===================================================================
@@ -228,7 +228,7 @@ _mm_hadds_epi16 (__m128i __A, __m128i __B)
__v4si __C = { 0 }, __D = { 0 };
__C = vec_sum4s ((__v8hi) __A, __C);
__D = vec_sum4s ((__v8hi) __B, __D);
- __C = (__v4si) vec_packs (__D, __C);
+ __C = (__v4si) vec_packs (__C, __D);
return (__m128i) __C;
}
@@ -264,8 +264,8 @@ _mm_hsub_epi32 (__m128i __A, __m128i __B)
{ 0, 1, 2, 3, 8, 9, 10, 11, 16, 17, 18, 19, 24, 25, 26, 27 };
const __v16qu __Q =
{ 4, 5, 6, 7, 12, 13, 14, 15, 20, 21, 22, 23, 28, 29, 30, 31 };
- __v4si __C = vec_perm ((__v4si) __B, (__v4si) __A, __P);
- __v4si __D = vec_perm ((__v4si) __B, (__v4si) __A, __Q);
+ __v4si __C = vec_perm ((__v4si) __A, (__v4si) __B, __P);
+ __v4si __D = vec_perm ((__v4si) __A, (__v4si) __B, __Q);
return (__m128i) vec_sub (__C, __D);
}
@@ -332,7 +332,7 @@ __attribute__((__gnu_inline__, __always_inline__,
_mm_shuffle_epi8 (__m128i __A, __m128i __B)
{
const __v16qi __zero = { 0 };
- __vector __bool char __select = vec_cmplt ((__v16qi) __A, __zero);
+ __vector __bool char __select = vec_cmplt ((__v16qi) __B, __zero);
__v16qi __C = vec_perm ((__v16qi) __A, (__v16qi) __A, (__v16qu) __B);
return (__m128i) vec_sel (__C, __zero, __select);
}
@@ -344,7 +344,7 @@ _mm_shuffle_pi8 (__m64 __A, __m64 __B)
const __v16qi __zero = { 0 };
__v16qi __C = (__v16qi) (__v2du) { __A, __A };
__v16qi __D = (__v16qi) (__v2du) { __B, __B };
- __vector __bool char __select = vec_cmplt ((__v16qi) __C, __zero);
+ __vector __bool char __select = vec_cmplt ((__v16qi) __D, __zero);
__C = vec_perm ((__v16qi) __C, (__v16qi) __C, (__v16qu) __D);
__C = vec_sel (__C, __zero, __select);
return (__m64) ((__v2du) (__C))[0];
@@ -423,11 +423,11 @@ extern __inline __m128i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_maddubs_epi16 (__m128i __A, __m128i __B)
{
- __v8hi __C = vec_unpackh ((__v16qi) __A);
- __v8hi __D = vec_unpackl ((__v16qi) __A);
__v8hi __unsigned = vec_splats ((signed short) 0x00ff);
- __v8hi __E = vec_and (vec_unpackh ((__v16qi) __B), __unsigned);
- __v8hi __F = vec_and (vec_unpackl ((__v16qi) __B), __unsigned);
+ __v8hi __C = vec_and (vec_unpackh ((__v16qi) __A), __unsigned);
+ __v8hi __D = vec_and (vec_unpackl ((__v16qi) __A), __unsigned);
+ __v8hi __E = vec_unpackh ((__v16qi) __B);
+ __v8hi __F = vec_unpackl ((__v16qi) __B);
__C = vec_mul (__C, __E);
__D = vec_mul (__D, __F);
const __v16qu __odds =
@@ -445,10 +445,10 @@ _mm_maddubs_pi16 (__m64 __A, __m64 __B)
{
__v8hi __C = (__v8hi) (__v2du) { __A, __A };
__C = vec_unpackl ((__v16qi) __C);
+ const __v8hi __unsigned = vec_splats ((signed short) 0x00ff);
+ __C = vec_and (__C, __unsigned);
__v8hi __D = (__v8hi) (__v2du) { __B, __B };
__D = vec_unpackl ((__v16qi) __D);
- const __v8hi __unsigned = vec_splats ((signed short) 0x00ff);
- __D = vec_and (__D, __unsigned);
__D = vec_mul (__C, __D);
const __v16qu __odds =
{ 0, 1, 4, 5, 8, 9, 12, 13, 16, 17, 20, 21, 24, 25, 28, 29 };
===================================================================
@@ -19,24 +19,9 @@ do_test (void)
int
main ()
{
-#ifdef __BUILTIN_CPU_SUPPORTS__
- /* Most SSE intrinsic operations can be implemented via VMX
- instructions, but some operations may be faster / simpler
- using the POWER8 VSX instructions. This is especially true
- when we are transferring / converting to / from __m64 types.
- The direct register transfer instructions from POWER8 are
- especially important. So we test for arch_2_07. */
- if (__builtin_cpu_supports ("arch_2_07"))
- {
- do_test ();
+ do_test ();
#ifdef DEBUG
- printf ("PASSED\n");
+ printf ("PASSED\n");
#endif
- }
-#ifdef DEBUG
- else
- printf ("SKIPPED\n");
-#endif
-#endif /* __BUILTIN_CPU_SUPPORTS__ */
return 0;
}
===================================================================
@@ -18,61 +18,57 @@
#ifndef __AVX__
/* Test the 64-bit form */
static void
-ssse3_test_pabsb (int *i1, int *r)
+ssse3_test_pabsb (__m64 *i1, __m64 *r)
{
- __m64 t1 = *(__m64 *) i1;
- *(__m64 *) r = _mm_abs_pi8 (t1);
+ *r = _mm_abs_pi8 (*i1);
_mm_empty ();
}
#endif
/* Test the 128-bit form */
static void
-ssse3_test_pabsb128 (int *i1, int *r)
+ssse3_test_pabsb128 (__m128i *i1, __m128i *r)
{
/* Assumes incoming pointers are 16-byte aligned */
- __m128i t1 = *(__m128i *) i1;
- *(__m128i *) r = _mm_abs_epi8 (t1);
+ *r = _mm_abs_epi8 (*i1);
}
/* Routine to manually compute the results */
static void
-compute_correct_result (int *i1, int *r)
+compute_correct_result (signed char *i1, signed char *r)
{
- char *b1 = (char *) i1;
- char *bout = (char *) r;
int i;
for (i = 0; i < 16; i++)
- if (b1[i] < 0)
- bout[i] = -b1[i];
+ if (i1[i] < 0)
+ r[i] = -i1[i];
else
- bout[i] = b1[i];
+ r[i] = i1[i];
}
static void
TEST (void)
{
int i;
- int r [4] __attribute__ ((aligned(16)));
- int ck [4];
+ union data r __attribute__ ((aligned(16)));
+ union data ck;
int fail = 0;
- for (i = 0; i < 256; i += 4)
+ for (i = 0; i < ARRAY_SIZE (vals); i ++)
{
/* Manually compute the result */
- compute_correct_result(&vals[i + 0], ck);
+ compute_correct_result(&vals[i].b[0], &ck.b[0]);
#ifndef __AVX__
/* Run the 64-bit tests */
- ssse3_test_pabsb (&vals[i + 0], &r[0]);
- ssse3_test_pabsb (&vals[i + 2], &r[2]);
- fail += chk_128 (ck, r);
+ ssse3_test_pabsb (&vals[i].ll[0], &r.ll[0]);
+ ssse3_test_pabsb (&vals[i].ll[1], &r.ll[1]);
+ fail += chk_128 (ck.m[0], r.m[0]);
#endif
/* Run the 128-bit tests */
- ssse3_test_pabsb128 (&vals[i + 0], r);
- fail += chk_128 (ck, r);
+ ssse3_test_pabsb128 (&vals[i].m[0], &r.m[0]);
+ fail += chk_128 (ck.m[0], r.m[0]);
}
if (fail != 0)
===================================================================
@@ -19,21 +19,19 @@
#ifndef __AVX__
/* Test the 64-bit form */
static void
-ssse3_test_pabsd (int *i1, int *r)
+ssse3_test_pabsd (__m64 *i1, __m64 *r)
{
- __m64 t1 = *(__m64 *) i1;
- *(__m64 *) r = _mm_abs_pi32 (t1);
+ *r = _mm_abs_pi32 (*i1);
_mm_empty ();
}
#endif
/* Test the 128-bit form */
static void
-ssse3_test_pabsd128 (int *i1, int *r)
+ssse3_test_pabsd128 (__m128i *i1, __m128i *r)
{
/* Assumes incoming pointers are 16-byte aligned */
- __m128i t1 = *(__m128i *) i1;
- *(__m128i *) r = _mm_abs_epi32 (t1);
+ *r = _mm_abs_epi32 (*i1);
}
/* Routine to manually compute the results */
@@ -53,25 +51,25 @@ static void
TEST (void)
{
int i;
- int r [4] __attribute__ ((aligned(16)));
- int ck [4];
+ union data r __attribute__ ((aligned(16)));
+ union data ck;
int fail = 0;
- for (i = 0; i < 256; i += 4)
+ for (i = 0; i < ARRAY_SIZE (vals); i ++)
{
/* Manually compute the result */
- compute_correct_result(&vals[i + 0], ck);
+ compute_correct_result(&vals[i].w[0], &ck.w[0]);
#ifndef __AVX__
/* Run the 64-bit tests */
- ssse3_test_pabsd (&vals[i + 0], &r[0]);
- ssse3_test_pabsd (&vals[i + 2], &r[2]);
- fail += chk_128 (ck, r);
+ ssse3_test_pabsd (&vals[i].ll[0], &r.ll[0]);
+ ssse3_test_pabsd (&vals[i].ll[1], &r.ll[1]);
+ fail += chk_128 (ck.m[0], r.m[0]);
#endif
/* Run the 128-bit tests */
- ssse3_test_pabsd128 (&vals[i + 0], r);
- fail += chk_128(ck, r);
+ ssse3_test_pabsd128 (&vals[i].m[0], &r.m[0]);
+ fail += chk_128(ck.m[0], r.m[0]);
}
if (fail != 0)
===================================================================
@@ -19,61 +19,57 @@
#ifndef __AVX__
/* Test the 64-bit form */
static void
-ssse3_test_pabsw (int *i1, int *r)
+ssse3_test_pabsw (__m64 *i1, __m64 *r)
{
- __m64 t1 = *(__m64 *) i1;
- *(__m64 *) r = _mm_abs_pi16 (t1);
+ *r = _mm_abs_pi16 (*i1);
_mm_empty ();
}
#endif
/* Test the 128-bit form */
static void
-ssse3_test_pabsw128 (int *i1, int *r)
+ssse3_test_pabsw128 (__m128i *i1, __m128i *r)
{
/* Assumes incoming pointers are 16-byte aligned */
- __m128i t1 = *(__m128i *) i1;
- *(__m128i *) r = _mm_abs_epi16 (t1);
+ *r = _mm_abs_epi16 (*i1);
}
/* Routine to manually compute the results */
static void
-compute_correct_result (int *i1, int *r)
+compute_correct_result (short *i1, short *r)
{
- short *s1 = (short *) i1;
- short *sout = (short *) r;
int i;
for (i = 0; i < 8; i++)
- if (s1[i] < 0)
- sout[i] = -s1[i];
+ if (i1[i] < 0)
+ r[i] = -i1[i];
else
- sout[i] = s1[i];
+ r[i] = i1[i];
}
static void
TEST (void)
{
int i;
- int r [4] __attribute__ ((aligned(16)));
- int ck [4];
+ union data r __attribute__ ((aligned(16)));
+ union data ck;
int fail = 0;
- for (i = 0; i < 256; i += 4)
+ for (i = 0; i < ARRAY_SIZE (vals); i ++)
{
/* Manually compute the result */
- compute_correct_result (&vals[i + 0], ck);
+ compute_correct_result (&vals[i].h[0], &ck.h[0]);
#ifndef __AVX__
/* Run the 64-bit tests */
- ssse3_test_pabsw (&vals[i + 0], &r[0]);
- ssse3_test_pabsw (&vals[i + 2], &r[2]);
- fail += chk_128 (ck, r);
+ ssse3_test_pabsw (&vals[i].ll[0], &r.ll[0]);
+ ssse3_test_pabsw (&vals[i].ll[1], &r.ll[1]);
+ fail += chk_128 (ck.m[0], r.m[0]);
#endif
/* Run the 128-bit tests */
- ssse3_test_pabsw128 (&vals[i + 0], r);
- fail += chk_128 (ck, r);
+ ssse3_test_pabsw128 (&vals[i].m[0], &r.m[0]);
+ fail += chk_128 (ck.m[0], r.m[0]);
}
if (fail != 0)
===================================================================
@@ -20,63 +20,60 @@
#ifndef __AVX__
/* Test the 64-bit form */
static void
-ssse3_test_palignr (int *i1, int *i2, unsigned int imm, int *r)
+ssse3_test_palignr (__m64 *i1, __m64 *i2, unsigned int imm, __m64 *r)
{
- __m64 t1 = *(__m64 *) i1;
- __m64 t2 = *(__m64 *) i2;
-
switch (imm)
{
case 0:
- *(__m64 *) r = _mm_alignr_pi8 (t1, t2, 0);
+ *r = _mm_alignr_pi8 (*i1, *i2, 0);
break;
case 1:
- *(__m64 *) r = _mm_alignr_pi8 (t1, t2, 1);
+ *r = _mm_alignr_pi8 (*i1, *i2, 1);
break;
case 2:
- *(__m64 *) r = _mm_alignr_pi8 (t1, t2, 2);
+ *r = _mm_alignr_pi8 (*i1, *i2, 2);
break;
case 3:
- *(__m64 *) r = _mm_alignr_pi8 (t1, t2, 3);
+ *r = _mm_alignr_pi8 (*i1, *i2, 3);
break;
case 4:
- *(__m64 *) r = _mm_alignr_pi8 (t1, t2, 4);
+ *r = _mm_alignr_pi8 (*i1, *i2, 4);
break;
case 5:
- *(__m64 *) r = _mm_alignr_pi8 (t1, t2, 5);
+ *r = _mm_alignr_pi8 (*i1, *i2, 5);
break;
case 6:
- *(__m64 *) r = _mm_alignr_pi8 (t1, t2, 6);
+ *r = _mm_alignr_pi8 (*i1, *i2, 6);
break;
case 7:
- *(__m64 *) r = _mm_alignr_pi8 (t1, t2, 7);
+ *r = _mm_alignr_pi8 (*i1, *i2, 7);
break;
case 8:
- *(__m64 *) r = _mm_alignr_pi8 (t1, t2, 8);
+ *r = _mm_alignr_pi8 (*i1, *i2, 8);
break;
case 9:
- *(__m64 *) r = _mm_alignr_pi8 (t1, t2, 9);
+ *r = _mm_alignr_pi8 (*i1, *i2, 9);
break;
case 10:
- *(__m64 *) r = _mm_alignr_pi8 (t1, t2, 10);
+ *r = _mm_alignr_pi8 (*i1, *i2, 10);
break;
case 11:
- *(__m64 *) r = _mm_alignr_pi8 (t1, t2, 11);
+ *r = _mm_alignr_pi8 (*i1, *i2, 11);
break;
case 12:
- *(__m64 *) r = _mm_alignr_pi8 (t1, t2, 12);
+ *r = _mm_alignr_pi8 (*i1, *i2, 12);
break;
case 13:
- *(__m64 *) r = _mm_alignr_pi8 (t1, t2, 13);
+ *r = _mm_alignr_pi8 (*i1, *i2, 13);
break;
case 14:
- *(__m64 *) r = _mm_alignr_pi8 (t1, t2, 14);
+ *r = _mm_alignr_pi8 (*i1, *i2, 14);
break;
case 15:
- *(__m64 *) r = _mm_alignr_pi8 (t1, t2, 15);
+ *r = _mm_alignr_pi8 (*i1, *i2, 15);
break;
default:
- *(__m64 *) r = _mm_alignr_pi8 (t1, t2, 16);
+ *r = _mm_alignr_pi8 (*i1, *i2, 16);
break;
}
@@ -86,122 +83,120 @@ static void
/* Test the 128-bit form */
static void
-ssse3_test_palignr128 (int *i1, int *i2, unsigned int imm, int *r)
+ssse3_test_palignr128 (__m128i *i1, __m128i *i2, unsigned int imm, __m128i *r)
{
/* Assumes incoming pointers are 16-byte aligned */
- __m128i t1 = *(__m128i *) i1;
- __m128i t2 = *(__m128i *) i2;
switch (imm)
{
case 0:
- *(__m128i *) r = _mm_alignr_epi8 (t1, t2, 0);
+ *r = _mm_alignr_epi8 (*i1, *i2, 0);
break;
case 1:
- *(__m128i *) r = _mm_alignr_epi8 (t1, t2, 1);
+ *r = _mm_alignr_epi8 (*i1, *i2, 1);
break;
case 2:
- *(__m128i *) r = _mm_alignr_epi8 (t1, t2, 2);
+ *r = _mm_alignr_epi8 (*i1, *i2, 2);
break;
case 3:
- *(__m128i *) r = _mm_alignr_epi8 (t1, t2, 3);
+ *r = _mm_alignr_epi8 (*i1, *i2, 3);
break;
case 4:
- *(__m128i *) r = _mm_alignr_epi8 (t1, t2, 4);
+ *r = _mm_alignr_epi8 (*i1, *i2, 4);
break;
case 5:
- *(__m128i *) r = _mm_alignr_epi8 (t1, t2, 5);
+ *r = _mm_alignr_epi8 (*i1, *i2, 5);
break;
case 6:
- *(__m128i *) r = _mm_alignr_epi8 (t1, t2, 6);
+ *r = _mm_alignr_epi8 (*i1, *i2, 6);
break;
case 7:
- *(__m128i *) r = _mm_alignr_epi8 (t1, t2, 7);
+ *r = _mm_alignr_epi8 (*i1, *i2, 7);
break;
case 8:
- *(__m128i *) r = _mm_alignr_epi8 (t1, t2, 8);
+ *r = _mm_alignr_epi8 (*i1, *i2, 8);
break;
case 9:
- *(__m128i *) r = _mm_alignr_epi8 (t1, t2, 9);
+ *r = _mm_alignr_epi8 (*i1, *i2, 9);
break;
case 10:
- *(__m128i *) r = _mm_alignr_epi8 (t1, t2, 10);
+ *r = _mm_alignr_epi8 (*i1, *i2, 10);
break;
case 11:
- *(__m128i *) r = _mm_alignr_epi8 (t1, t2, 11);
+ *r = _mm_alignr_epi8 (*i1, *i2, 11);
break;
case 12:
- *(__m128i *) r = _mm_alignr_epi8 (t1, t2, 12);
+ *r = _mm_alignr_epi8 (*i1, *i2, 12);
break;
case 13:
- *(__m128i *) r = _mm_alignr_epi8 (t1, t2, 13);
+ *r = _mm_alignr_epi8 (*i1, *i2, 13);
break;
case 14:
- *(__m128i *) r = _mm_alignr_epi8 (t1, t2, 14);
+ *r = _mm_alignr_epi8 (*i1, *i2, 14);
break;
case 15:
- *(__m128i *) r = _mm_alignr_epi8 (t1, t2, 15);
+ *r = _mm_alignr_epi8 (*i1, *i2, 15);
break;
case 16:
- *(__m128i *) r = _mm_alignr_epi8 (t1, t2, 16);
+ *r = _mm_alignr_epi8 (*i1, *i2, 16);
break;
case 17:
- *(__m128i *) r = _mm_alignr_epi8 (t1, t2, 17);
+ *r = _mm_alignr_epi8 (*i1, *i2, 17);
break;
case 18:
- *(__m128i *) r = _mm_alignr_epi8 (t1, t2, 18);
+ *r = _mm_alignr_epi8 (*i1, *i2, 18);
break;
case 19:
- *(__m128i *) r = _mm_alignr_epi8 (t1, t2, 19);
+ *r = _mm_alignr_epi8 (*i1, *i2, 19);
break;
case 20:
- *(__m128i *) r = _mm_alignr_epi8 (t1, t2, 20);
+ *r = _mm_alignr_epi8 (*i1, *i2, 20);
break;
case 21:
- *(__m128i *) r = _mm_alignr_epi8 (t1, t2, 21);
+ *r = _mm_alignr_epi8 (*i1, *i2, 21);
break;
case 22:
- *(__m128i *) r = _mm_alignr_epi8 (t1, t2, 22);
+ *r = _mm_alignr_epi8 (*i1, *i2, 22);
break;
case 23:
- *(__m128i *) r = _mm_alignr_epi8 (t1, t2, 23);
+ *r = _mm_alignr_epi8 (*i1, *i2, 23);
break;
case 24:
- *(__m128i *) r = _mm_alignr_epi8 (t1, t2, 24);
+ *r = _mm_alignr_epi8 (*i1, *i2, 24);
break;
case 25:
- *(__m128i *) r = _mm_alignr_epi8 (t1, t2, 25);
+ *r = _mm_alignr_epi8 (*i1, *i2, 25);
break;
case 26:
- *(__m128i *) r = _mm_alignr_epi8 (t1, t2, 26);
+ *r = _mm_alignr_epi8 (*i1, *i2, 26);
break;
case 27:
- *(__m128i *) r = _mm_alignr_epi8 (t1, t2, 27);
+ *r = _mm_alignr_epi8 (*i1, *i2, 27);
break;
case 28:
- *(__m128i *) r = _mm_alignr_epi8 (t1, t2, 28);
+ *r = _mm_alignr_epi8 (*i1, *i2, 28);
break;
case 29:
- *(__m128i *) r = _mm_alignr_epi8 (t1, t2, 29);
+ *r = _mm_alignr_epi8 (*i1, *i2, 29);
break;
case 30:
- *(__m128i *) r = _mm_alignr_epi8 (t1, t2, 30);
+ *r = _mm_alignr_epi8 (*i1, *i2, 30);
break;
case 31:
- *(__m128i *) r = _mm_alignr_epi8 (t1, t2, 31);
+ *r = _mm_alignr_epi8 (*i1, *i2, 31);
break;
default:
- *(__m128i *) r = _mm_alignr_epi8 (t1, t2, 32);
+ *r = _mm_alignr_epi8 (*i1, *i2, 32);
break;
}
}
/* Routine to manually compute the results */
static void
-compute_correct_result_128 (int *i1, int *i2, unsigned int imm, int *r)
+compute_correct_result_128 (signed char *i1, signed char *i2, unsigned int imm,
+ signed char *r)
{
- char buf [32];
- char *bout = (char *) r;
+ signed char buf [32];
int i;
memcpy (&buf[0], i2, 16);
@@ -209,38 +204,38 @@ static void
for (i = 0; i < 16; i++)
if (imm >= 32 || imm + i >= 32)
- bout[i] = 0;
+ r[i] = 0;
else
- bout[i] = buf[imm + i];
+ r[i] = buf[imm + i];
}
#ifndef __AVX__
static void
-compute_correct_result_64 (int *i1, int *i2, unsigned int imm, int *r)
+compute_correct_result_64 (signed char *i1, signed char *i2, unsigned int imm,
+ signed char *r)
{
- char buf [16];
- char *bout = (char *)r;
+ signed char buf [16];
int i;
/* Handle the first half */
- memcpy (&buf[0], i2, 8);
- memcpy (&buf[8], i1, 8);
+ memcpy (&buf[0], &i2[0], 8);
+ memcpy (&buf[8], &i1[0], 8);
for (i = 0; i < 8; i++)
if (imm >= 16 || imm + i >= 16)
- bout[i] = 0;
+ r[i] = 0;
else
- bout[i] = buf[imm + i];
+ r[i] = buf[imm + i];
/* Handle the second half */
- memcpy (&buf[0], &i2[2], 8);
- memcpy (&buf[8], &i1[2], 8);
+ memcpy (&buf[0], &i2[8], 8);
+ memcpy (&buf[8], &i1[8], 8);
for (i = 0; i < 8; i++)
if (imm >= 16 || imm + i >= 16)
- bout[i + 8] = 0;
+ r[i + 8] = 0;
else
- bout[i + 8] = buf[imm + i];
+ r[i + 8] = buf[imm + i];
}
#endif
@@ -248,30 +243,35 @@ static void
TEST (void)
{
int i;
- int r [4] __attribute__ ((aligned(16)));
- int ck [4];
+ union data r __attribute__ ((aligned(16)));
+ union data ck;
unsigned int imm;
int fail = 0;
- for (i = 0; i < 256; i += 8)
+ for (i = 0; i < ARRAY_SIZE (vals) - 1; i ++)
for (imm = 0; imm < 100; imm++)
{
#ifndef __AVX__
/* Manually compute the result */
- compute_correct_result_64 (&vals[i + 0], &vals[i + 4], imm, ck);
+ compute_correct_result_64 (&vals[i + 0].b[0],
+ &vals[i + 1].b[0], imm, &ck.b[0]);
/* Run the 64-bit tests */
- ssse3_test_palignr (&vals[i + 0], &vals[i + 4], imm, &r[0]);
- ssse3_test_palignr (&vals[i + 2], &vals[i + 6], imm, &r[2]);
- fail += chk_128 (ck, r);
+ ssse3_test_palignr (&vals[i + 0].ll[0],
+ &vals[i + 1].ll[0], imm, &r.ll[0]);
+ ssse3_test_palignr (&vals[i + 0].ll[1],
+ &vals[i + 1].ll[1], imm, &r.ll[1]);
+ fail += chk_128 (ck.m[0], r.m[0]);
#endif
/* Recompute the results for 128-bits */
- compute_correct_result_128 (&vals[i + 0], &vals[i + 4], imm, ck);
+ compute_correct_result_128 (&vals[i + 0].b[0],
+ &vals[i + 1].b[0], imm, &ck.b[0]);
/* Run the 128-bit tests */
- ssse3_test_palignr128 (&vals[i + 0], &vals[i + 4], imm, r);
- fail += chk_128 (ck, r);
+ ssse3_test_palignr128 (&vals[i + 0].m[0],
+ &vals[i + 1].m[0], imm, &r.m[0]);
+ fail += chk_128 (ck.m[0], r.m[0]);
}
if (fail != 0)
===================================================================
@@ -19,23 +19,19 @@
#ifndef __AVX__
/* Test the 64-bit form */
static void
-ssse3_test_phaddd (int *i1, int *i2, int *r)
+ssse3_test_phaddd (__m64 *i1, __m64 *i2, __m64 *r)
{
- __m64 t1 = *(__m64 *) i1;
- __m64 t2 = *(__m64 *) i2;
- *(__m64 *) r = _mm_hadd_pi32 (t1, t2);
+ *r = _mm_hadd_pi32 (*i1, *i2);
_mm_empty();
}
#endif
/* Test the 128-bit form */
static void
-ssse3_test_phaddd128 (int *i1, int *i2, int *r)
+ssse3_test_phaddd128 (__m128i *i1, __m128i *i2, __m128i *r)
{
/* Assumes incoming pointers are 16-byte aligned */
- __m128i t1 = *(__m128i *) i1;
- __m128i t2 = *(__m128i *) i2;
- *(__m128i *) r = _mm_hadd_epi32 (t1, t2);
+ *r = _mm_hadd_epi32 (*i1, *i2);
}
/* Routine to manually compute the results */
@@ -54,26 +50,25 @@ static void
TEST (void)
{
int i;
- int r [4] __attribute__ ((aligned(16)));
- int ck [4];
+ union data r __attribute__ ((aligned(16)));
+ union data ck;
int fail = 0;
-
- for (i = 0; i < 256; i += 8)
+ for (i = 0; i < ARRAY_SIZE (vals) - 1; i ++)
{
/* Manually compute the result */
- compute_correct_result (&vals[i + 0], &vals[i + 4], ck);
+ compute_correct_result (&vals[i + 0].w[0], &vals[i + 1].w[0], &ck.w[0]);
#ifndef __AVX__
/* Run the 64-bit tests */
- ssse3_test_phaddd (&vals[i + 0], &vals[i + 2], &r[0]);
- ssse3_test_phaddd (&vals[i + 4], &vals[i + 6], &r[2]);
- fail += chk_128 (ck, r);
+ ssse3_test_phaddd (&vals[i + 0].ll[0], &vals[i + 0].ll[1], &r.ll[0]);
+ ssse3_test_phaddd (&vals[i + 1].ll[0], &vals[i + 1].ll[1], &r.ll[1]);
+ fail += chk_128 (ck.m[0], r.m[0]);
#endif
/* Run the 128-bit tests */
- ssse3_test_phaddd128 (&vals[i + 0], &vals[i + 4], r);
- fail += chk_128 (ck, r);
+ ssse3_test_phaddd128 (&vals[i + 0].m[0], &vals[i + 1].m[0], &r.m[0]);
+ fail += chk_128 (ck.m[0], r.m[0]);
}
if (fail != 0)
===================================================================
@@ -19,23 +19,19 @@
#ifndef __AVX__
/* Test the 64-bit form */
static void
-ssse3_test_phaddsw (int *i1, int *i2, int *r)
+ssse3_test_phaddsw (__m64 *i1, __m64 *i2, __m64 *r)
{
- __m64 t1 = *(__m64 *) i1;
- __m64 t2 = *(__m64 *) i2;
- *(__m64 *) r = _mm_hadds_pi16 (t1, t2);
+ *r = _mm_hadds_pi16 (*i1, *i2);
_mm_empty ();
}
#endif
/* Test the 128-bit form */
static void
-ssse3_test_phaddsw128 (int *i1, int *i2, int *r)
+ssse3_test_phaddsw128 (__m128i *i1, __m128i *i2, __m128i *r)
{
/* Assumes incoming pointers are 16-byte aligned */
- __m128i t1 = *(__m128i *) i1;
- __m128i t2 = *(__m128i *) i2;
- *(__m128i *) r = _mm_hadds_epi16 (t1, t2);
+ *(__m128i *) r = _mm_hadds_epi16 (*i1, *i2);
}
static short
@@ -52,42 +48,39 @@ signed_saturate_to_word (int x)
/* Routine to manually compute the results */
static void
-compute_correct_result (int *i1, int *i2, int *r)
+compute_correct_result (short *i1, short *i2, short *r)
{
- short *s1 = (short *) i1;
- short *s2 = (short *) i2;
- short *sout = (short *) r;
int i;
for (i = 0; i < 4; i++)
- sout[i] = signed_saturate_to_word(s1[2 * i] + s1[2 * i + 1]);
+ r[i + 0] = signed_saturate_to_word(i1[2 * i] + i1[2 * i + 1]);
for (i = 0; i < 4; i++)
- sout[i + 4] = signed_saturate_to_word(s2[2 * i] + s2[2 * i + 1]);
+ r[i + 4] = signed_saturate_to_word(i2[2 * i] + i2[2 * i + 1]);
}
static void
TEST (void)
{
int i;
- int r [4] __attribute__ ((aligned(16)));
- int ck [4];
+ union data r __attribute__ ((aligned(16)));
+ union data ck;
int fail = 0;
- for (i = 0; i < 256; i += 8)
+ for (i = 0; i < ARRAY_SIZE (vals) - 1; i ++)
{
/* Manually compute the result */
- compute_correct_result (&vals[i + 0], &vals[i + 4], ck);
+ compute_correct_result (&vals[i + 0].h[0], &vals[i + 1].h[0], &ck.h[0]);
#ifndef __AVX__
/* Run the 64-bit tests */
- ssse3_test_phaddsw (&vals[i + 0], &vals[i + 2], &r[0]);
- ssse3_test_phaddsw (&vals[i + 4], &vals[i + 6], &r[2]);
- fail += chk_128 (ck, r);
+ ssse3_test_phaddsw (&vals[i + 0].ll[0], &vals[i + 0].ll[1], &r.ll[0]);
+ ssse3_test_phaddsw (&vals[i + 1].ll[0], &vals[i + 1].ll[1], &r.ll[1]);
+ fail += chk_128 (ck.m[0], r.m[0]);
#endif
/* Run the 128-bit tests */
- ssse3_test_phaddsw128 (&vals[i + 0], &vals[i + 4], r);
- fail += chk_128 (ck, r);
+ ssse3_test_phaddsw128 (&vals[i + 0].m[0], &vals[i + 1].m[0], &r.m[0]);
+ fail += chk_128 (ck.m[0], r.m[0]);
}
if (fail != 0)
===================================================================
@@ -19,64 +19,57 @@
#ifndef __AVX__
/* Test the 64-bit form */
static void
-ssse3_test_phaddw (int *i1, int *i2, int *r)
+ssse3_test_phaddw (__m64 *i1, __m64 *i2, __m64 *r)
{
- __m64 t1 = *(__m64 *) i1;
- __m64 t2 = *(__m64 *) i2;
- *(__m64 *) r = _mm_hadd_pi16 (t1, t2);
+ *r = _mm_hadd_pi16 (*i1, *i2);
_mm_empty ();
}
#endif
/* Test the 128-bit form */
static void
-ssse3_test_phaddw128 (int *i1, int *i2, int *r)
+ssse3_test_phaddw128 (__m128i *i1, __m128i *i2, __m128i *r)
{
/* Assumes incoming pointers are 16-byte aligned */
- __m128i t1 = *(__m128i *) i1;
- __m128i t2 = *(__m128i *) i2;
- *(__m128i *) r = _mm_hadd_epi16 (t1, t2);
+ *r = _mm_hadd_epi16 (*i1, *i2);
}
/* Routine to manually compute the results */
static void
-compute_correct_result(int *i1, int *i2, int *r)
+compute_correct_result(short *i1, short *i2, short *r)
{
- short *s1 = (short *) i1;
- short *s2 = (short *) i2;
- short *sout = (short *) r;
int i;
for (i = 0; i < 4; i++)
- sout[i] = s1[2 * i] + s1[2 * i + 1];
+ r[i] = i1[2 * i] + i1[2 * i + 1];
for (i = 0; i < 4; i++)
- sout[i + 4] = s2[2 * i] + s2[2 * i + 1];
+ r[i + 4] = i2[2 * i] + i2[2 * i + 1];
}
static void
TEST (void)
{
int i;
- int r [4] __attribute__ ((aligned(16)));
- int ck [4];
+ union data r __attribute__ ((aligned(16)));
+ union data ck;
int fail = 0;
- for (i = 0; i < 256; i += 8)
+ for (i = 0; i < ARRAY_SIZE (vals) - 1; i ++)
{
/* Manually compute the result */
- compute_correct_result (&vals[i + 0], &vals[i + 4], ck);
+ compute_correct_result (&vals[i + 0].h[0], &vals[i + 1].h[0], &ck.h[0]);
#ifndef __AVX__
/* Run the 64-bit tests */
- ssse3_test_phaddw (&vals[i + 0], &vals[i + 2], &r[0]);
- ssse3_test_phaddw (&vals[i + 4], &vals[i + 6], &r[2]);
- fail += chk_128 (ck, r);
+ ssse3_test_phaddw (&vals[i + 0].ll[0], &vals[i + 0].ll[1], &r.ll[0]);
+ ssse3_test_phaddw (&vals[i + 1].ll[0], &vals[i + 1].ll[1], &r.ll[1]);
+ fail += chk_128 (ck.m[0], r.m[0]);
#endif
/* Run the 128-bit tests */
- ssse3_test_phaddw128 (&vals[i + 0], &vals[i + 4], r);
- fail += chk_128 (ck, r);
+ ssse3_test_phaddw128 (&vals[i + 0].m[0], &vals[i + 1].m[0], &r.m[0]);
+ fail += chk_128 (ck.m[0], r.m[0]);
}
if (fail != 0)
===================================================================
@@ -19,23 +19,19 @@
#ifndef __AVX__
/* Test the 64-bit form */
static void
-ssse3_test_phsubd (int *i1, int *i2, int *r)
+ssse3_test_phsubd (__m64 *i1, __m64 *i2, __m64 *r)
{
- __m64 t1 = *(__m64 *) i1;
- __m64 t2 = *(__m64 *) i2;
- *(__m64 *) r = _mm_hsub_pi32(t1, t2);
+ *r = _mm_hsub_pi32 (*i1, *i2);
_mm_empty ();
}
#endif
/* Test the 128-bit form */
static void
-ssse3_test_phsubd128 (int *i1, int *i2, int *r)
+ssse3_test_phsubd128 (__m128i *i1, __m128i *i2, __m128i *r)
{
/* Assumes incoming pointers are 16-byte aligned */
- __m128i t1 = *(__m128i *) i1;
- __m128i t2 = *(__m128i *) i2;
- *(__m128i *) r = _mm_hsub_epi32 (t1, t2);
+ *(__m128i *) r = _mm_hsub_epi32 (*i1, *i2);
}
/* Routine to manually compute the results */
@@ -54,25 +50,25 @@ static void
TEST (void)
{
int i;
- int r [4] __attribute__ ((aligned(16)));
- int ck [4];
+ union data r __attribute__ ((aligned(16)));
+ union data ck;
int fail = 0;
- for (i = 0; i < 256; i += 8)
+ for (i = 0; i < ARRAY_SIZE (vals) - 1; i ++)
{
/* Manually compute the result */
- compute_correct_result (&vals[i + 0], &vals[i + 4], ck);
+ compute_correct_result (&vals[i + 0].w[0], &vals[i + 1].w[0], &ck.w[0]);
#ifndef __AVX__
/* Run the 64-bit tests */
- ssse3_test_phsubd (&vals[i + 0], &vals[i + 2], &r[0]);
- ssse3_test_phsubd (&vals[i + 4], &vals[i + 6], &r[2]);
- fail += chk_128 (ck, r);
+ ssse3_test_phsubd (&vals[i + 0].ll[0], &vals[i + 0].ll[1], &r.ll[0]);
+ ssse3_test_phsubd (&vals[i + 1].ll[0], &vals[i + 1].ll[1], &r.ll[1]);
+ fail += chk_128 (ck.m[0], r.m[0]);
#endif
/* Run the 128-bit tests */
- ssse3_test_phsubd128 (&vals[i + 0], &vals[i + 4], r);
- fail += chk_128 (ck, r);
+ ssse3_test_phsubd128 (&vals[i + 0].m[0], &vals[i + 1].m[0], &r.m[0]);
+ fail += chk_128 (ck.m[0], r.m[0]);
}
if (fail != 0)
===================================================================
@@ -19,25 +19,19 @@
#ifndef __AVX__
/* Test the 64-bit form */
static void
-ssse3_test_phsubsw (int *i1, int *i2, int *r)
+ssse3_test_phsubsw (__m64 *i1, __m64 *i2, __m64 *r)
{
- __m64 t1 = *(__m64 *) i1;
- __m64 t2 = *(__m64 *) i2;
-
- *(__m64 *) r = _mm_hsubs_pi16 (t1, t2);
-
+ *(__m64 *) r = _mm_hsubs_pi16 (*i1, *i2);
_mm_empty ();
}
#endif
/* Test the 128-bit form */
static void
-ssse3_test_phsubsw128 (int *i1, int *i2, int *r)
+ssse3_test_phsubsw128 (__m128i *i1, __m128i *i2, __m128i *r)
{
/* Assumes incoming pointers are 16-byte aligned */
- __m128i t1 = *(__m128i *) i1;
- __m128i t2 = *(__m128i *) i2;
- *(__m128i *) r = _mm_hsubs_epi16 (t1, t2);
+ *r = _mm_hsubs_epi16 (*i1, *i2);
}
static short
@@ -54,43 +48,40 @@ signed_saturate_to_word (int x)
/* Routine to manually compute the results */
static void
-compute_correct_result (int *i1, int *i2, int *r)
+compute_correct_result (short *i1, short *i2, short *r)
{
- short *s1 = (short *) i1;
- short *s2 = (short *) i2;
- short *sout = (short *) r;
int i;
for (i = 0; i < 4; i++)
- sout[i] = signed_saturate_to_word (s1[2 * i] - s1[2 * i + 1]);
+ r[i] = signed_saturate_to_word (i1[2 * i] - i1[2 * i + 1]);
for (i = 0; i < 4; i++)
- sout[i + 4] = signed_saturate_to_word (s2[2 * i] - s2[2 * i + 1]);
+ r[i + 4] = signed_saturate_to_word (i2[2 * i] - i2[2 * i + 1]);
}
static void
TEST (void)
{
int i;
- int r [4] __attribute__ ((aligned(16)));
- int ck [4];
+ union data r __attribute__ ((aligned(16)));
+ union data ck;
int fail = 0;
- for (i = 0; i < 256; i += 8)
+ for (i = 0; i < ARRAY_SIZE (vals) - 1; i ++)
{
/* Manually compute the result */
- compute_correct_result (&vals[i + 0], &vals[i + 4], ck);
+ compute_correct_result (&vals[i + 0].h[0], &vals[i + 1].h[0], &ck.h[0]);
#ifndef __AVX__
/* Run the 64-bit tests */
- ssse3_test_phsubsw (&vals[i + 0], &vals[i + 2], &r[0]);
- ssse3_test_phsubsw (&vals[i + 4], &vals[i + 6], &r[2]);
- fail += chk_128 (ck, r);
+ ssse3_test_phsubsw (&vals[i + 0].ll[0], &vals[i + 0].ll[1], &r.ll[0]);
+ ssse3_test_phsubsw (&vals[i + 1].ll[0], &vals[i + 1].ll[1], &r.ll[1]);
+ fail += chk_128 (ck.m[0], r.m[0]);
#endif
/* Run the 128-bit tests */
- ssse3_test_phsubsw128 (&vals[i + 0], &vals[i + 4], r);
- fail += chk_128 (ck, r);
+ ssse3_test_phsubsw128 (&vals[i + 0].m[0], &vals[i + 1].m[0], &r.m[0]);
+ fail += chk_128 (ck.m[0], r.m[0]);
}
if (fail != 0)
===================================================================
@@ -18,64 +18,56 @@
#ifndef __AVX__
/* Test the 64-bit form */
static void
-ssse3_test_phsubw (int *i1, int *i2, int *r)
+ssse3_test_phsubw (__m64 *i1, __m64 *i2, __m64 *r)
{
- __m64 t1 = *(__m64 *) i1;
- __m64 t2 = *(__m64 *) i2;
- *(__m64 *) r = _mm_hsub_pi16 (t1, t2);
+ *(__m64 *) r = _mm_hsub_pi16 (*i1, *i2);
_mm_empty ();
}
#endif
/* Test the 128-bit form */
static void
-ssse3_test_phsubw128 (int *i1, int *i2, int *r)
+ssse3_test_phsubw128 (__m128i *i1, __m128i *i2, __m128i *r)
{
/* Assumes incoming pointers are 16-byte aligned */
- __m128i t1 = *(__m128i *) i1;
- __m128i t2 = *(__m128i *) i2;
-
- *(__m128i *) r = _mm_hsub_epi16 (t1, t2);
+ *(__m128i *) r = _mm_hsub_epi16 (*i1, *i2);
}
/* Routine to manually compute the results */
static void
-compute_correct_result (int *i1, int *i2, int *r)
+compute_correct_result (short *i1, short *i2, short *r)
{
- short *s1 = (short *) i1;
- short *s2 = (short *) i2;
- short *sout = (short *) r;
int i;
for (i = 0; i < 4; i++)
- sout[i] = s1[2 * i] - s1[2 * i + 1];
+ r[i] = i1[2 * i] - i1[2 * i + 1];
for (i = 0; i < 4; i++)
- sout[i + 4] = s2[2 * i] - s2[2 * i + 1];
+ r[i + 4] = i2[2 * i] - i2[2 * i + 1];
}
static void
TEST (void)
{
int i;
- int r [4] __attribute__ ((aligned(16)));
- int ck [4];
+ union data r __attribute__ ((aligned(16)));
+ union data ck;
int fail = 0;
- for (i = 0; i < 256; i += 8)
+ for (i = 0; i < ARRAY_SIZE (vals) - 1; i ++)
{
/* Manually compute the result */
- compute_correct_result (&vals[i + 0], &vals[i + 4], ck);
+ compute_correct_result (&vals[i + 0].h[0], &vals[i + 1].h[0], &ck.h[0]);
#ifndef __AVX__
/* Run the 64-bit tests */
- ssse3_test_phsubw (&vals[i + 0], &vals[i + 2], &r[0]);
- ssse3_test_phsubw (&vals[i + 4], &vals[i + 6], &r[2]);
- fail += chk_128 (ck, r);
+ ssse3_test_phsubw (&vals[i + 0].ll[0], &vals[i + 0].ll[1], &r.ll[0]);
+ ssse3_test_phsubw (&vals[i + 1].ll[0], &vals[i + 1].ll[1], &r.ll[1]);
+ fail += chk_128 (ck.m[0], r.m[0]);
#endif
/* Run the 128-bit tests */
- ssse3_test_phsubw128 (&vals[i + 0], &vals[i + 4], r);
- fail += chk_128 (ck, r);
+ ssse3_test_phsubw128 (&vals[i + 0].m[0], &vals[i + 1].m[0], &r.m[0]);
+ fail += chk_128 (ck.m[0], r.m[0]);
}
if (fail != 0)
===================================================================
@@ -19,23 +19,19 @@
#ifndef __AVX__
/* Test the 64-bit form */
static void
-ssse3_test_pmaddubsw (int *i1, int *i2, int *r)
+ssse3_test_pmaddubsw (__m64 *i1, __m64 *i2, __m64 *r)
{
- __m64 t1 = *(__m64 *) i1;
- __m64 t2 = *(__m64 *) i2;
- *(__m64 *) r = _mm_maddubs_pi16 (t1, t2);
+ *(__m64 *) r = _mm_maddubs_pi16 (*i1, *i2);
_mm_empty ();
}
#endif
/* Test the 128-bit form */
static void
-ssse3_test_pmaddubsw128 (int *i1, int *i2, int *r)
+ssse3_test_pmaddubsw128 (__m128i *i1, __m128i *i2, __m128i *r)
{
/* Assumes incoming pointers are 16-byte aligned */
- __m128i t1 = *(__m128i *) i1;
- __m128i t2 = *(__m128i *) i2;
- *(__m128i *) r = _mm_maddubs_epi16 (t1, t2);
+ *r = _mm_maddubs_epi16 (*i1, *i2);
}
static short
@@ -52,19 +48,16 @@ signed_saturate_to_word(int x)
/* Routine to manually compute the results */
static void
-compute_correct_result (int *i1, int *i2, int *r)
+compute_correct_result (unsigned char *i1, signed char *i2, short *r)
{
- unsigned char *ub1 = (unsigned char *) i1;
- char *sb2 = (char *) i2;
- short *sout = (short *) r;
int t0;
int i;
for (i = 0; i < 8; i++)
{
- t0 = ((int) ub1[2 * i] * (int) sb2[2 * i] +
- (int) ub1[2 * i + 1] * (int) sb2[2 * i + 1]);
- sout[i] = signed_saturate_to_word (t0);
+ t0 = ((int) i1[2 * i] * (int) i2[2 * i] +
+ (int) i1[2 * i + 1] * (int) i2[2 * i + 1]);
+ r[i] = signed_saturate_to_word (t0);
}
}
@@ -72,25 +65,25 @@ static void
TEST (void)
{
int i;
- int r [4] __attribute__ ((aligned(16)));
- int ck [4];
+ union data r __attribute__ ((aligned(16)));
+ union data ck;
int fail = 0;
- for (i = 0; i < 256; i += 8)
+ for (i = 0; i < ARRAY_SIZE (vals) - 1; i ++)
{
/* Manually compute the result */
- compute_correct_result (&vals[i + 0], &vals[i + 4], ck);
+ compute_correct_result (&vals[i + 0].ub[0], &vals[i + 1].b[0], &ck.h[0]);
#ifndef __AVX__
/* Run the 64-bit tests */
- ssse3_test_pmaddubsw (&vals[i + 0], &vals[i + 4], &r[0]);
- ssse3_test_pmaddubsw (&vals[i + 2], &vals[i + 6], &r[2]);
- fail += chk_128 (ck, r);
+ ssse3_test_pmaddubsw (&vals[i + 0].ll[0], &vals[i + 1].ll[0], &r.ll[0]);
+ ssse3_test_pmaddubsw (&vals[i + 0].ll[1], &vals[i + 1].ll[1], &r.ll[1]);
+ fail += chk_128 (ck.m[0], r.m[0]);
#endif
/* Run the 128-bit tests */
- ssse3_test_pmaddubsw128 (&vals[i + 0], &vals[i + 4], r);
- fail += chk_128 (ck, r);
+ ssse3_test_pmaddubsw128 (&vals[i + 0].m[0], &vals[i + 1].m[0], &r.m[0]);
+ fail += chk_128 (ck.m[0], r.m[0]);
}
if (fail != 0)
===================================================================
@@ -19,39 +19,32 @@
#ifndef __AVX__
/* Test the 64-bit form */
static void
-ssse3_test_pmulhrsw (int *i1, int *i2, int *r)
+ssse3_test_pmulhrsw (__m64 *i1, __m64 *i2, __m64 *r)
{
- __m64 t1 = *(__m64 *) i1;
- __m64 t2 = *(__m64 *) i2;
- *(__m64 *) r = _mm_mulhrs_pi16 (t1, t2);
+ *r = _mm_mulhrs_pi16 (*i1, *i2);
_mm_empty ();
}
#endif
/* Test the 128-bit form */
static void
-ssse3_test_pmulhrsw128 (int *i1, int *i2, int *r)
+ssse3_test_pmulhrsw128 (__m128i *i1, __m128i *i2, __m128i *r)
{
/* Assumes incoming pointers are 16-byte aligned */
- __m128i t1 = *(__m128i *) i1;
- __m128i t2 = *(__m128i *) i2;
- *(__m128i *) r = _mm_mulhrs_epi16 (t1, t2);
+ *r = _mm_mulhrs_epi16 (*i1, *i2);
}
/* Routine to manually compute the results */
static void
-compute_correct_result (int *i1, int *i2, int *r)
+compute_correct_result (short *i1, short *i2, short *r)
{
- short *s1 = (short *) i1;
- short *s2 = (short *) i2;
- short *sout = (short *) r;
int t0;
int i;
for (i = 0; i < 8; i++)
{
- t0 = (((int) s1[i] * (int) s2[i]) >> 14) + 1;
- sout[i] = (short) (t0 >> 1);
+ t0 = (((int) i1[i] * (int) i2[i]) >> 14) + 1;
+ r[i] = (short) (t0 >> 1);
}
}
@@ -59,25 +52,25 @@ static void
TEST (void)
{
int i;
- int r [4] __attribute__ ((aligned(16)));
- int ck [4];
+ union data r __attribute__ ((aligned(16)));
+ union data ck;
int fail = 0;
- for (i = 0; i < 256; i += 8)
+ for (i = 0; i < ARRAY_SIZE (vals) - 1; i ++)
{
/* Manually compute the result */
- compute_correct_result (&vals[i + 0], &vals[i + 4], ck);
+ compute_correct_result (&vals[i + 0].h[0], &vals[i + 1].h[0], &ck.h[0]);
#ifndef __AVX__
/* Run the 64-bit tests */
- ssse3_test_pmulhrsw (&vals[i + 0], &vals[i + 4], &r[0]);
- ssse3_test_pmulhrsw (&vals[i + 2], &vals[i + 6], &r[2]);
- fail += chk_128 (ck, r);
+ ssse3_test_pmulhrsw (&vals[i + 0].ll[0], &vals[i + 1].ll[0], &r.ll[0]);
+ ssse3_test_pmulhrsw (&vals[i + 0].ll[1], &vals[i + 1].ll[1], &r.ll[1]);
+ fail += chk_128 (ck.m[0], r.m[0]);
#endif
/* Run the 128-bit tests */
- ssse3_test_pmulhrsw128 (&vals[i + 0], &vals[i + 4], r);
- fail += chk_128 (ck, r);
+ ssse3_test_pmulhrsw128 (&vals[i + 0].m[0], &vals[i + 1].m[0], &r.m[0]);
+ fail += chk_128 (ck.m[0], r.m[0]);
}
if (fail != 0)
===================================================================
@@ -19,65 +19,55 @@
#ifndef __AVX__
/* Test the 64-bit form */
static void
-ssse3_test_pshufb (int *i1, int *i2, int *r)
+ssse3_test_pshufb (__m64 *i1, __m64 *i2, __m64 *r)
{
- __m64 t1 = *(__m64 *) i1;
- __m64 t2 = *(__m64 *) i2;
- *(__m64 *)r = _mm_shuffle_pi8 (t1, t2);
+ *r = _mm_shuffle_pi8 (*i1, *i2);
_mm_empty ();
}
#endif
/* Test the 128-bit form */
static void
-ssse3_test_pshufb128 (int *i1, int *i2, int *r)
+ssse3_test_pshufb128 (__m128i *i1, __m128i *i2, __m128i *r)
{
/* Assumes incoming pointers are 16-byte aligned */
- __m128i t1 = *(__m128i *) i1;
- __m128i t2 = *(__m128i *) i2;
- *(__m128i *)r = _mm_shuffle_epi8 (t1, t2);
+ *r = _mm_shuffle_epi8 (*i1, *i2);
}
#ifndef __AVX__
/* Routine to manually compute the results */
static void
-compute_correct_result_64 (int *i1, int *i2, int *r)
+compute_correct_result_64 (signed char *i1, signed char *i2, signed char *r)
{
- char *b1 = (char *) i1;
- char *b2 = (char *) i2;
- char *bout = (char *) r;
int i;
char select;
for (i = 0; i < 16; i++)
{
- select = b2[i];
+ select = i2[i];
if (select & 0x80)
- bout[i] = 0;
+ r[i] = 0;
else if (i < 8)
- bout[i] = b1[select & 0x7];
+ r[i] = i1[select & 0x7];
else
- bout[i] = b1[8 + (select & 0x7)];
+ r[i] = i1[8 + (select & 0x7)];
}
}
#endif
static void
-compute_correct_result_128 (int *i1, int *i2, int *r)
+compute_correct_result_128 (signed char *i1, signed char *i2, signed char *r)
{
- char *b1 = (char *) i1;
- char *b2 = (char *) i2;
- char *bout = (char *) r;
int i;
char select;
for (i = 0; i < 16; i++)
{
- select = b2[i];
+ select = i2[i];
if (select & 0x80)
- bout[i] = 0;
+ r[i] = 0;
else
- bout[i] = b1[select & 0xf];
+ r[i] = i1[select & 0xf];
}
}
@@ -85,28 +75,28 @@ static void
TEST (void)
{
int i;
- int r [4] __attribute__ ((aligned(16)));
- int ck [4];
+ union data r __attribute__ ((aligned(16)));
+ union data ck;
int fail = 0;
- for (i = 0; i < 256; i += 8)
+ for (i = 0; i < ARRAY_SIZE (vals) - 1; i ++)
{
#ifndef __AVX__
/* Manually compute the result */
- compute_correct_result_64 (&vals[i + 0], &vals[i + 4], ck);
+ compute_correct_result_64 (&vals[i + 0].b[0], &vals[i + 1].b[0], &ck.b[0]);
/* Run the 64-bit tests */
- ssse3_test_pshufb (&vals[i + 0], &vals[i + 4], &r[0]);
- ssse3_test_pshufb (&vals[i + 2], &vals[i + 6], &r[2]);
- fail += chk_128 (ck, r);
+ ssse3_test_pshufb (&vals[i + 0].ll[0], &vals[i + 1].ll[0], &r.ll[0]);
+ ssse3_test_pshufb (&vals[i + 0].ll[1], &vals[i + 1].ll[1], &r.ll[1]);
+ fail += chk_128 (ck.m[0], r.m[0]);
#endif
/* Recompute the result for 128-bits */
- compute_correct_result_128 (&vals[i + 0], &vals[i + 4], ck);
+ compute_correct_result_128 (&vals[i + 0].b[0], &vals[i + 1].b[0], &ck.b[0]);
/* Run the 128-bit tests */
- ssse3_test_pshufb128 (&vals[i + 0], &vals[i + 4], r);
- fail += chk_128 (ck, r);
+ ssse3_test_pshufb128 (&vals[i + 0].m[0], &vals[i + 1].m[0], &r.m[0]);
+ fail += chk_128 (ck.m[0], r.m[0]);
}
if (fail != 0)
===================================================================
@@ -18,66 +18,59 @@
#ifndef __AVX__
/* Test the 64-bit form */
static void
-ssse3_test_psignb (int *i1, int *i2, int *r)
+ssse3_test_psignb (__m64 *i1, __m64 *i2, __m64 *r)
{
- __m64 t1 = *(__m64 *) i1;
- __m64 t2 = *(__m64 *) i2;
- *(__m64 *) r = _mm_sign_pi8 (t1, t2);
+ *r = _mm_sign_pi8 (*i1, *i2);
_mm_empty ();
}
#endif
/* Test the 128-bit form */
static void
-ssse3_test_psignb128 (int *i1, int *i2, int *r)
+ssse3_test_psignb128 (__m128i *i1, __m128i *i2, __m128i *r)
{
/* Assumes incoming pointers are 16-byte aligned */
- __m128i t1 = *(__m128i *) i1;
- __m128i t2 = *(__m128i *) i2;
- *(__m128i *) r = _mm_sign_epi8 (t1, t2);
+ *r = _mm_sign_epi8 (*i1, *i2);
}
/* Routine to manually compute the results */
static void
-compute_correct_result (int *i1, int *i2, int *r)
+compute_correct_result (signed char *i1, signed char *i2, signed char *r)
{
- char *b1 = (char *) i1;
- char *b2 = (char *) i2;
- char *bout = (char *) r;
int i;
for (i = 0; i < 16; i++)
- if (b2[i] < 0)
- bout[i] = -b1[i];
- else if (b2[i] == 0)
- bout[i] = 0;
+ if (i2[i] < 0)
+ r[i] = -i1[i];
+ else if (i2[i] == 0)
+ r[i] = 0;
else
- bout[i] = b1[i];
+ r[i] = i1[i];
}
static void
TEST (void)
{
int i;
- int r [4] __attribute__ ((aligned(16)));
- int ck [4];
+ union data r __attribute__ ((aligned(16)));
+ union data ck;
int fail = 0;
- for (i = 0; i < 256; i += 8)
+ for (i = 0; i < ARRAY_SIZE (vals) - 1; i ++)
{
/* Manually compute the result */
- compute_correct_result (&vals[i + 0], &vals[i + 4], ck);
+ compute_correct_result (&vals[i + 0].b[0], &vals[i + 1].b[0], &ck.b[0]);
#ifndef __AVX__
/* Run the 64-bit tests */
- ssse3_test_psignb (&vals[i + 0], &vals[i + 4], &r[0]);
- ssse3_test_psignb (&vals[i + 2], &vals[i + 6], &r[2]);
- fail += chk_128 (ck, r);
+ ssse3_test_psignb (&vals[i + 0].ll[0], &vals[i + 1].ll[0], &r.ll[0]);
+ ssse3_test_psignb (&vals[i + 0].ll[1], &vals[i + 1].ll[1], &r.ll[1]);
+ fail += chk_128 (ck.m[0], r.m[0]);
#endif
/* Run the 128-bit tests */
- ssse3_test_psignb128 (&vals[i + 0], &vals[i + 4], r);
- fail += chk_128 (ck, r);
+ ssse3_test_psignb128 (&vals[i + 0].m[0], &vals[i + 1].m[0], &r.m[0]);
+ fail += chk_128 (ck.m[0], r.m[0]);
}
if (fail != 0)
===================================================================
@@ -18,23 +18,19 @@
#ifndef __AVX__
/* Test the 64-bit form */
static void
-ssse3_test_psignd (int *i1, int *i2, int *r)
+ssse3_test_psignd (__m64 *i1, __m64 *i2, __m64 *r)
{
- __m64 t1 = *(__m64 *) i1;
- __m64 t2 = *(__m64 *) i2;
- *(__m64 *) r = _mm_sign_pi32 (t1, t2);
+ *r = _mm_sign_pi32 (*i1, *i2);
_mm_empty ();
}
#endif
/* Test the 128-bit form */
static void
-ssse3_test_psignd128 (int *i1, int *i2, int *r)
+ssse3_test_psignd128 (__m128i *i1, __m128i *i2, __m128i *r)
{
/* Assumes incoming pointers are 16-byte aligned */
- __m128i t1 = *(__m128i *) i1;
- __m128i t2 = *(__m128i *) i2;
- *(__m128i *)r = _mm_sign_epi32 (t1, t2);
+ *r = _mm_sign_epi32 (*i1, *i2);
}
/* Routine to manually compute the results */
@@ -56,25 +52,25 @@ static void
TEST (void)
{
int i;
- int r [4] __attribute__ ((aligned(16)));
- int ck [4];
+ union data r __attribute__ ((aligned(16)));
+ union data ck;
int fail = 0;
- for (i = 0; i < 256; i += 8)
+ for (i = 0; i < ARRAY_SIZE (vals) - 1; i ++)
{
/* Manually compute the result */
- compute_correct_result (&vals[i + 0], &vals[i + 4], ck);
+ compute_correct_result (&vals[i + 0].w[0], &vals[i + 1].w[0], &ck.w[0]);
#ifndef __AVX__
/* Run the 64-bit tests */
- ssse3_test_psignd (&vals[i + 0], &vals[i + 4], &r[0]);
- ssse3_test_psignd (&vals[i + 2], &vals[i + 6], &r[2]);
- fail += chk_128 (ck, r);
+ ssse3_test_psignd (&vals[i + 0].ll[0], &vals[i + 1].ll[0], &r.ll[0]);
+ ssse3_test_psignd (&vals[i + 0].ll[1], &vals[i + 1].ll[1], &r.ll[1]);
+ fail += chk_128 (ck.m[0], r.m[0]);
#endif
/* Run the 128-bit tests */
- ssse3_test_psignd128 (&vals[i + 0], &vals[i + 4], r);
- fail += chk_128 (ck, r);
+ ssse3_test_psignd128 (&vals[i + 0].m[0], &vals[i + 1].m[0], &r.m[0]);
+ fail += chk_128 (ck.m[0], r.m[0]);
}
if (fail != 0)
===================================================================
@@ -18,66 +18,59 @@
#ifndef __AVX__
/* Test the 64-bit form */
static void
-ssse3_test_psignw (int *i1, int *i2, int *r)
+ssse3_test_psignw (__m64 *i1, __m64 *i2, __m64 *r)
{
- __m64 t1 = *(__m64 *) i1;
- __m64 t2 = *(__m64 *) i2;
- *(__m64 *) r = _mm_sign_pi16 (t1, t2);
+ *r = _mm_sign_pi16 (*i1, *i2);
_mm_empty ();
}
#endif
/* Test the 128-bit form */
static void
-ssse3_test_psignw128 (int *i1, int *i2, int *r)
+ssse3_test_psignw128 (__m128i *i1, __m128i *i2, __m128i *r)
{
/* Assumes incoming pointers are 16-byte aligned */
- __m128i t1 = *(__m128i *) i1;
- __m128i t2 = *(__m128i *) i2;
- *(__m128i *) r = _mm_sign_epi16 (t1, t2);
+ *r = _mm_sign_epi16 (*i1, *i2);
}
/* Routine to manually compute the results */
static void
-compute_correct_result (int *i1, int *i2, int *r)
+compute_correct_result (short *i1, short *i2, short *r)
{
- short *s1 = (short *) i1;
- short *s2 = (short *) i2;
- short *sout = (short *) r;
int i;
for (i = 0; i < 8; i++)
- if (s2[i] < 0)
- sout[i] = -s1[i];
- else if (s2[i] == 0)
- sout[i] = 0;
+ if (i2[i] < 0)
+ r[i] = -i1[i];
+ else if (i2[i] == 0)
+ r[i] = 0;
else
- sout[i] = s1[i];
+ r[i] = i1[i];
}
static void
TEST (void)
{
int i;
- int r [4] __attribute__ ((aligned(16)));
- int ck [4];
+ union data r __attribute__ ((aligned(16)));
+ union data ck;
int fail = 0;
- for (i = 0; i < 256; i += 8)
+ for (i = 0; i < ARRAY_SIZE (vals) - 1; i ++)
{
/* Manually compute the result */
- compute_correct_result (&vals[i + 0], &vals[i + 4], ck);
+ compute_correct_result (&vals[i + 0].h[0], &vals[i + 1].h[0], &ck.h[0]);
#ifndef __AVX__
/* Run the 64-bit tests */
- ssse3_test_psignw (&vals[i + 0], &vals[i + 4], &r[0]);
- ssse3_test_psignw (&vals[i + 2], &vals[i + 6], &r[2]);
- fail += chk_128 (ck, r);
+ ssse3_test_psignw (&vals[i + 0].ll[0], &vals[i + 1].ll[0], &r.ll[0]);
+ ssse3_test_psignw (&vals[i + 0].ll[1], &vals[i + 1].ll[1], &r.ll[1]);
+ fail += chk_128 (ck.m[0], r.m[0]);
#endif
/* Run the 128-bit tests */
- ssse3_test_psignw128 (&vals[i + 0], &vals[i + 4], r);
- fail += chk_128 (ck, r);
+ ssse3_test_psignw128 (&vals[i + 0].m[0], &vals[i + 1].m[0], &r.m[0]);
+ fail += chk_128 (ck.m[0], r.m[0]);
}
if (fail != 0)
===================================================================
@@ -1,60 +1,83 @@
+#include <tmmintrin.h>
+
/* Routine to check correctness of the results */
static int
-chk_128 (int *v1, int *v2)
+chk_128 (__m128i v1, __m128i v2)
{
- int i;
- int n_fails = 0;
-
- for (i = 0; i < 4; i++)
- if (v1[i] != v2[i])
- n_fails += 1;
-
- return n_fails;
+ return (v1[0] != v2[0]) || (v1[1] != v2[1]);
}
-static int vals [256] __attribute__ ((aligned(16))) =
+static union data {
+ int w[4];
+ signed char b[16];
+ unsigned char ub[16];
+ short h[8];
+ unsigned long long ll[2];
+ __m128i m[1];
+} vals[] __attribute__ ((aligned(16))) =
{
- 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x5be800ee, 0x4f2d7b15,
- 0x409d9291, 0xdd95f27f, 0x423986e3, 0x21a4d2cd, 0xa7056d84, 0x4f4e5a3b,
- 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
- 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
- 0x73ef0244, 0xcd836329, 0x847f634f, 0xa7e3abcf, 0xb4c14764, 0x1ef42c06,
- 0x504f29ac, 0x4ae7ca73, 0xaddde3c9, 0xf63ded2e, 0xa5d3553d, 0xa52ae05f,
- 0x6fd3c83a, 0x7dc2b300, 0x76b05de7, 0xea8ebae5, 0x549568dd, 0x172f0358,
- 0x917eadf0, 0x796fb0a7, 0xb39381af, 0xd0591d61, 0x731d2f17, 0xbc4b6f5d,
- 0x8ec664c2, 0x3c199c19, 0x9c81db12, 0x6d85913b, 0x486107a9, 0xab6f4b26,
- 0x5630d37c, 0x20836e85, 0x40d4e746, 0xdfbaba36, 0xbeacaa69, 0xb3c84083,
- 0x8a688eb4, 0x08cde481, 0x66e7a190, 0x74ee1639, 0xb3942a19, 0xe0c40471,
- 0x9b789489, 0x9751207a, 0x543a1524, 0x41da7ad6, 0x614bb563, 0xf86f57b1,
- 0x69e62199, 0x2150cb12, 0x9ed74062, 0x429471f4, 0xad28502b, 0xf2e2d4d5,
- 0x45b6ce09, 0xaaa5e649, 0xb46da484, 0x0a637515, 0xae7a3212, 0x5afc784c,
- 0x776cfbbe, 0x9c542bb2, 0x64193aa8, 0x16e8a655, 0x4e3d2f92, 0xe05d7b72,
- 0x89854ebc, 0x8c318814, 0xb81e76e0, 0x3f2625f5, 0x61b44852, 0x5209d7ad,
- 0x842fe317, 0xd3cfcca1, 0x8d287cc7, 0x80f0c9a8, 0x4215f4e5, 0x563993d6,
- 0x5d627433, 0xc4449e35, 0x5b4fe009, 0x3ef92286, 0xacbc8927, 0x549ab870,
- 0x9ac5b959, 0xed8f1c91, 0x7ecf02cd, 0x989c0e8b, 0xa31d6918, 0x1dc2bcc1,
- 0x99d3f3cc, 0x6857acc8, 0x45d7324a, 0xaebdf2e6, 0x7af2f2ae, 0x09716f73,
- 0x7816e694, 0xc65493c0, 0x9f7e87bc, 0xaa96cd40, 0xbfb5bfc6, 0x01a2cce7,
- 0x5f1d8c46, 0x45303efb, 0xb24607c3, 0xef2009a7, 0xba873753, 0xbefb14bc,
- 0x74e53cd3, 0x70124708, 0x6eb4bdbd, 0xf3ba5e43, 0x4c94085f, 0x0c03e7e0,
- 0x9a084931, 0x62735424, 0xaeee77c5, 0xdb34f90f, 0x6860cbdd, 0xaf77cf9f,
- 0x95b28158, 0x23bd70d7, 0x9fbc3d88, 0x742e659e, 0x53bcfb48, 0xb8a63f6c,
- 0x4dcf3373, 0x2b168627, 0x4fe20745, 0xd0af5e94, 0x22514e6a, 0xb8ef25c2,
- 0x89ec781a, 0x13d9002b, 0x6d724500, 0x7fdbf63f, 0xb0e9ced5, 0xf919e0f3,
- 0x00fef203, 0x8905d47a, 0x434e7517, 0x4aef8e2c, 0x689f51e8, 0xe513b7c3,
- 0x72bbc5d2, 0x3a222f74, 0x05c3a0f9, 0xd5489d82, 0xb41fbe83, 0xec5d305f,
- 0x5ea02b0b, 0xb176065b, 0xa8eb404e, 0x80349117, 0x210fd49e, 0x43898d0e,
- 0x6c151b9c, 0x8742df18, 0x7b64de73, 0x1dbf52b2, 0x55c9cb19, 0xeb841f10,
- 0x10b8ae76, 0x0764ecb6, 0xb7479018, 0x2672cb3f, 0x7ac9ac90, 0x4be5332c,
- 0x8f1a0615, 0x4efb7a77, 0x16551a85, 0xdb2c3d66, 0x49179c07, 0x5dc4657e,
- 0x5e76907e, 0xd7486a9c, 0x445204a4, 0x65cdc426, 0x33f86ded, 0xcba95dda,
- 0x83351f16, 0xfedefad9, 0x639b620f, 0x86896a64, 0xba4099ba, 0x965f4a21,
- 0x1247154f, 0x25604c42, 0x5862d692, 0xb1e9149e, 0x612516a5, 0x02c49bf8,
- 0x631212bf, 0x9f69f54e, 0x168b63b0, 0x310a25ba, 0xa42a59cd, 0x084f0af9,
- 0x44a06cec, 0x5c0cda40, 0xb932d721, 0x7c42bb0d, 0x213cd3f0, 0xedc7f5a4,
- 0x7fb85859, 0x6b3da5ea, 0x61cd591e, 0xe8e9aa08, 0x4361fc34, 0x53d40d2a,
- 0x0511ad1b, 0xf996b44c, 0xb5ead756, 0xc022138d, 0x6172adf1, 0xa4a0a3b4,
- 0x8c2977b8, 0xa8e482ed, 0x04fcdd6b, 0x3f7b85d4, 0x4fca1e46, 0xa392ddca,
- 0x569fc791, 0x346a706c, 0x543bf3eb, 0x895b3cde, 0x2146bb80, 0x26b3c168,
- 0x929998db, 0x1ea472c9, 0x7207b36b, 0x6a8f10d4
+ { { 0x00000000, 0x00000000, 0x00000000, 0x00000000 } },
+ { { 0x5be800ee, 0x4f2d7b15, 0x409d9291, 0xdd95f27f } },
+ { { 0x423986e3, 0x21a4d2cd, 0xa7056d84, 0x4f4e5a3b } },
+ { { 0x00000000, 0x00000000, 0x00000000, 0x00000000 } },
+ { { 0x00000000, 0x00000000, 0x00000000, 0x00000000 } },
+ { { 0x00000000, 0x00000000, 0x00000000, 0x00000000 } },
+ { { 0x73ef0244, 0xcd836329, 0x847f634f, 0xa7e3abcf } },
+ { { 0xb4c14764, 0x1ef42c06, 0x504f29ac, 0x4ae7ca73 } },
+ { { 0xaddde3c9, 0xf63ded2e, 0xa5d3553d, 0xa52ae05f } },
+ { { 0x6fd3c83a, 0x7dc2b300, 0x76b05de7, 0xea8ebae5 } },
+ { { 0x549568dd, 0x172f0358, 0x917eadf0, 0x796fb0a7 } },
+ { { 0xb39381af, 0xd0591d61, 0x731d2f17, 0xbc4b6f5d } },
+ { { 0x8ec664c2, 0x3c199c19, 0x9c81db12, 0x6d85913b } },
+ { { 0x486107a9, 0xab6f4b26, 0x5630d37c, 0x20836e85 } },
+ { { 0x40d4e746, 0xdfbaba36, 0xbeacaa69, 0xb3c84083 } },
+ { { 0x8a688eb4, 0x08cde481, 0x66e7a190, 0x74ee1639 } },
+ { { 0xb3942a19, 0xe0c40471, 0x9b789489, 0x9751207a } },
+ { { 0x543a1524, 0x41da7ad6, 0x614bb563, 0xf86f57b1 } },
+ { { 0x69e62199, 0x2150cb12, 0x9ed74062, 0x429471f4 } },
+ { { 0xad28502b, 0xf2e2d4d5, 0x45b6ce09, 0xaaa5e649 } },
+ { { 0xb46da484, 0x0a637515, 0xae7a3212, 0x5afc784c } },
+ { { 0x776cfbbe, 0x9c542bb2, 0x64193aa8, 0x16e8a655 } },
+ { { 0x4e3d2f92, 0xe05d7b72, 0x89854ebc, 0x8c318814 } },
+ { { 0xb81e76e0, 0x3f2625f5, 0x61b44852, 0x5209d7ad } },
+ { { 0x842fe317, 0xd3cfcca1, 0x8d287cc7, 0x80f0c9a8 } },
+ { { 0x4215f4e5, 0x563993d6, 0x5d627433, 0xc4449e35 } },
+ { { 0x5b4fe009, 0x3ef92286, 0xacbc8927, 0x549ab870 } },
+ { { 0x9ac5b959, 0xed8f1c91, 0x7ecf02cd, 0x989c0e8b } },
+ { { 0xa31d6918, 0x1dc2bcc1, 0x99d3f3cc, 0x6857acc8 } },
+ { { 0x45d7324a, 0xaebdf2e6, 0x7af2f2ae, 0x09716f73 } },
+ { { 0x7816e694, 0xc65493c0, 0x9f7e87bc, 0xaa96cd40 } },
+ { { 0xbfb5bfc6, 0x01a2cce7, 0x5f1d8c46, 0x45303efb } },
+ { { 0xb24607c3, 0xef2009a7, 0xba873753, 0xbefb14bc } },
+ { { 0x74e53cd3, 0x70124708, 0x6eb4bdbd, 0xf3ba5e43 } },
+ { { 0x4c94085f, 0x0c03e7e0, 0x9a084931, 0x62735424 } },
+ { { 0xaeee77c5, 0xdb34f90f, 0x6860cbdd, 0xaf77cf9f } },
+ { { 0x95b28158, 0x23bd70d7, 0x9fbc3d88, 0x742e659e } },
+ { { 0x53bcfb48, 0xb8a63f6c, 0x4dcf3373, 0x2b168627 } },
+ { { 0x4fe20745, 0xd0af5e94, 0x22514e6a, 0xb8ef25c2 } },
+ { { 0x89ec781a, 0x13d9002b, 0x6d724500, 0x7fdbf63f } },
+ { { 0xb0e9ced5, 0xf919e0f3, 0x00fef203, 0x8905d47a } },
+ { { 0x434e7517, 0x4aef8e2c, 0x689f51e8, 0xe513b7c3 } },
+ { { 0x72bbc5d2, 0x3a222f74, 0x05c3a0f9, 0xd5489d82 } },
+ { { 0xb41fbe83, 0xec5d305f, 0x5ea02b0b, 0xb176065b } },
+ { { 0xa8eb404e, 0x80349117, 0x210fd49e, 0x43898d0e } },
+ { { 0x6c151b9c, 0x8742df18, 0x7b64de73, 0x1dbf52b2 } },
+ { { 0x55c9cb19, 0xeb841f10, 0x10b8ae76, 0x0764ecb6 } },
+ { { 0xb7479018, 0x2672cb3f, 0x7ac9ac90, 0x4be5332c } },
+ { { 0x8f1a0615, 0x4efb7a77, 0x16551a85, 0xdb2c3d66 } },
+ { { 0x49179c07, 0x5dc4657e, 0x5e76907e, 0xd7486a9c } },
+ { { 0x445204a4, 0x65cdc426, 0x33f86ded, 0xcba95dda } },
+ { { 0x83351f16, 0xfedefad9, 0x639b620f, 0x86896a64 } },
+ { { 0xba4099ba, 0x965f4a21, 0x1247154f, 0x25604c42 } },
+ { { 0x5862d692, 0xb1e9149e, 0x612516a5, 0x02c49bf8 } },
+ { { 0x631212bf, 0x9f69f54e, 0x168b63b0, 0x310a25ba } },
+ { { 0xa42a59cd, 0x084f0af9, 0x44a06cec, 0x5c0cda40 } },
+ { { 0xb932d721, 0x7c42bb0d, 0x213cd3f0, 0xedc7f5a4 } },
+ { { 0x7fb85859, 0x6b3da5ea, 0x61cd591e, 0xe8e9aa08 } },
+ { { 0x4361fc34, 0x53d40d2a, 0x0511ad1b, 0xf996b44c } },
+ { { 0xb5ead756, 0xc022138d, 0x6172adf1, 0xa4a0a3b4 } },
+ { { 0x8c2977b8, 0xa8e482ed, 0x04fcdd6b, 0x3f7b85d4 } },
+ { { 0x4fca1e46, 0xa392ddca, 0x569fc791, 0x346a706c } },
+ { { 0x543bf3eb, 0x895b3cde, 0x2146bb80, 0x26b3c168 } },
+ { { 0x929998db, 0x1ea472c9, 0x7207b36b, 0x6a8f10d4 } }
};