diff mbox series

[v2,rs6000,4/4] Add compatible implementations of x86 SSSE3 intrinsics <tmmintrin.h>

Message ID d6966b40-3bed-ca7c-0542-5792fd0c8ea3@us.ibm.com
State New
Headers show
Series [v2,rs6000,1/4] Fixes for x86 intrinsics on POWER 32bit | expand

Commit Message

Paul A. Clarke Oct. 25, 2018, 7:08 p.m. UTC
This is part 2/2 for contributing PPC64LE support for X86 SSE3
instrisics. This patch includes testsuite/gcc.target tests for the
intrinsics defined in pmmintrin.h, copied from gcc.target/i386.

Bootstrapped and tested on Linux POWER8 LE, POWER8 BE (64 & 32), and POWER7.

OK for trunk?

[gcc/testsuite]

2018-10-25  Paul A. Clarke  <pc@us.ibm.com>

	* gcc.target/powerpc/sse3-check.h: New file.
	* gcc.target/powerpc/ssse3-vals.h: New file.
	* gcc.target/powerpc/ssse3-pabsb.c: New file.
	* gcc.target/powerpc/ssse3-pabsd.c: New file.
	* gcc.target/powerpc/ssse3-pabsw.c: New file.
	* gcc.target/powerpc/ssse3-palignr.c: New file.
	* gcc.target/powerpc/ssse3-phaddd.c: New file.
	* gcc.target/powerpc/ssse3-phaddsw.c: New file.
	* gcc.target/powerpc/ssse3-phaddw.c: New file.
	* gcc.target/powerpc/ssse3-phsubd.c: New file.
	* gcc.target/powerpc/ssse3-phsubsw.c: New file.
	* gcc.target/powerpc/ssse3-phsubw.c: New file.
	* gcc.target/powerpc/ssse3-pmaddubsw.c: New file.
	* gcc.target/powerpc/ssse3-pmulhrsw.c: New file.
	* gcc.target/powerpc/ssse3-pshufb.c: New file.
	* gcc.target/powerpc/ssse3-psignb.c: New file.
	* gcc.target/powerpc/ssse3-psignd.c: New file.
	* gcc.target/powerpc/ssse3-psignw.c: New file.

Comments

Segher Boessenkool Oct. 26, 2018, 6:02 p.m. UTC | #1
On Thu, Oct 25, 2018 at 02:08:03PM -0500, Paul Clarke wrote:
> This is part 2/2 for contributing PPC64LE support for X86 SSE3
> instrisics. This patch includes testsuite/gcc.target tests for the
> intrinsics defined in pmmintrin.h, copied from gcc.target/i386.
> 
> Bootstrapped and tested on Linux POWER8 LE, POWER8 BE (64 & 32), and POWER7.
> 
> OK for trunk?

Yes please.  Thanks!


Segher
Bill Schmidt Oct. 26, 2018, 7:02 p.m. UTC | #2
On 10/25/18 2:08 PM, Paul Clarke wrote:
> This is part 2/2 for contributing PPC64LE support for X86 SSE3
> instrisics. This patch includes testsuite/gcc.target tests for the
> intrinsics defined in pmmintrin.h, copied from gcc.target/i386.
>
> Bootstrapped and tested on Linux POWER8 LE, POWER8 BE (64 & 32), and POWER7.
>
> OK for trunk?
>
> [gcc/testsuite]
>
> 2018-10-25  Paul A. Clarke  <pc@us.ibm.com>
>
> 	* gcc.target/powerpc/sse3-check.h: New file.
> 	* gcc.target/powerpc/ssse3-vals.h: New file.
> 	* gcc.target/powerpc/ssse3-pabsb.c: New file.
> 	* gcc.target/powerpc/ssse3-pabsd.c: New file.
> 	* gcc.target/powerpc/ssse3-pabsw.c: New file.
> 	* gcc.target/powerpc/ssse3-palignr.c: New file.
> 	* gcc.target/powerpc/ssse3-phaddd.c: New file.
> 	* gcc.target/powerpc/ssse3-phaddsw.c: New file.
> 	* gcc.target/powerpc/ssse3-phaddw.c: New file.
> 	* gcc.target/powerpc/ssse3-phsubd.c: New file.
> 	* gcc.target/powerpc/ssse3-phsubsw.c: New file.
> 	* gcc.target/powerpc/ssse3-phsubw.c: New file.
> 	* gcc.target/powerpc/ssse3-pmaddubsw.c: New file.
> 	* gcc.target/powerpc/ssse3-pmulhrsw.c: New file.
> 	* gcc.target/powerpc/ssse3-pshufb.c: New file.
> 	* gcc.target/powerpc/ssse3-psignb.c: New file.
> 	* gcc.target/powerpc/ssse3-psignd.c: New file.
> 	* gcc.target/powerpc/ssse3-psignw.c: New file.
>
> Index: gcc/testsuite/gcc.target/powerpc/ssse3-check.h
> ===================================================================
> diff --git a/trunk/gcc/testsuite/gcc.target/powerpc/ssse3-check.h b/trunk/gcc/testsuite/gcc.target/powerpc/ssse3-check.h
> new file mode 10644
> --- /dev/null	(revision 0)
> +++ b/trunk/gcc/testsuite/gcc.target/powerpc/ssse3-check.h	(working copy)
> @@ -0,0 +1,43 @@
> +#include <stdio.h>
> +#include <stdlib.h>
> +
> +#include "m128-check.h"
> +
> +/* define DEBUG replace abort with printf on error.  */

One nit -- this comment appears to be incorrect, as the only place DEBUG is used,
you don't have abort() anywhere.

(I have a patch under review that questions why we would replace abort() rather
than supplement it with printf, anyway...)

Thanks,
Bill

> +//#define DEBUG 1
> +
> +#define TEST ssse3_test
> +
> +static void ssse3_test (void);
> +
> +static void
> +__attribute__ ((noinline))
> +do_test (void)
> +{
> +  ssse3_test ();
> +}
> +
> +int
> +main ()
> +{
> +#ifdef __BUILTIN_CPU_SUPPORTS__
> +  /* Most SSE intrinsic operations can be implemented via VMX
> +     instructions, but some operations may be faster / simpler
> +     using the POWER8 VSX instructions.  This is especially true
> +     when we are transferring / converting to / from __m64 types.
> +     The direct register transfer instructions from POWER8 are
> +     especially important.  So we test for arch_2_07.  */
> +  if (__builtin_cpu_supports ("arch_2_07"))
> +    {
> +      do_test ();
> +#ifdef DEBUG
> +      printf ("PASSED\n");
> +#endif
> +    }
> +#ifdef DEBUG
> +  else
> +    printf ("SKIPPED\n");
> +#endif
> +#endif /* __BUILTIN_CPU_SUPPORTS__ */
> +  return 0;
> +}
> Index: gcc/testsuite/gcc.target/powerpc/ssse3-pabsb.c
> ===================================================================
> diff --git a/trunk/gcc/testsuite/gcc.target/powerpc/ssse3-pabsb.c b/trunk/gcc/testsuite/gcc.target/powerpc/ssse3-pabsb.c
> new file mode 10644
> --- /dev/null	(revision 0)
> +++ b/trunk/gcc/testsuite/gcc.target/powerpc/ssse3-pabsb.c	(working copy)
> @@ -0,0 +1,80 @@
> +/* { dg-do run } */
> +/* { dg-options "-O3 -mpower8-vector -Wno-psabi" } */
> +/* { dg-require-effective-target p8vector_hw } */
> +
> +#ifndef CHECK_H
> +#define CHECK_H "ssse3-check.h"
> +#endif
> +
> +#ifndef TEST
> +#define TEST ssse3_test
> +#endif
> +
> +#include CHECK_H
> +
> +#include "ssse3-vals.h"
> +#include <tmmintrin.h>
> +
> +#ifndef __AVX__
> +/* Test the 64-bit form */
> +static void
> +ssse3_test_pabsb (int *i1, int *r)
> +{
> +  __m64 t1 = *(__m64 *) i1;
> +  *(__m64 *) r = _mm_abs_pi8 (t1);
> +  _mm_empty ();
> +}
> +#endif
> +
> +/* Test the 128-bit form */
> +static void
> +ssse3_test_pabsb128 (int *i1, int *r)
> +{
> +  /* Assumes incoming pointers are 16-byte aligned */
> +  __m128i t1 = *(__m128i *) i1;
> +  *(__m128i *) r = _mm_abs_epi8 (t1);
> +}
> +
> +/* Routine to manually compute the results */
> +static void
> +compute_correct_result (int *i1, int *r)
> +{
> +  char *b1 = (char *) i1;
> +  char *bout = (char *) r;
> +  int i;
> +
> +  for (i = 0; i < 16; i++)
> +    if (b1[i] < 0)
> +      bout[i] = -b1[i];
> +    else
> +      bout[i] = b1[i];
> +}
> +
> +static void
> +TEST (void)
> +{
> +  int i;
> +  int r [4] __attribute__ ((aligned(16)));
> +  int ck [4];
> +  int fail = 0;
> +
> +  for (i = 0; i < 256; i += 4)
> +    {
> +      /* Manually compute the result */
> +      compute_correct_result(&vals[i + 0], ck);
> +
> +#ifndef __AVX__
> +      /* Run the 64-bit tests */
> +      ssse3_test_pabsb (&vals[i + 0], &r[0]);
> +      ssse3_test_pabsb (&vals[i + 2], &r[2]);
> +      fail += chk_128 (ck, r);
> +#endif
> +
> +      /* Run the 128-bit tests */
> +      ssse3_test_pabsb128 (&vals[i + 0], r);
> +      fail += chk_128 (ck, r);
> +    }
> +
> +  if (fail != 0)
> +    abort ();
> +}
> Index: gcc/testsuite/gcc.target/powerpc/ssse3-pabsd.c
> ===================================================================
> diff --git a/trunk/gcc/testsuite/gcc.target/powerpc/ssse3-pabsd.c b/trunk/gcc/testsuite/gcc.target/powerpc/ssse3-pabsd.c
> new file mode 10644
> --- /dev/null	(revision 0)
> +++ b/trunk/gcc/testsuite/gcc.target/powerpc/ssse3-pabsd.c	(working copy)
> @@ -0,0 +1,79 @@
> +/* { dg-do run } */
> +/* { dg-options "-O3 -mpower8-vector -Wno-psabi" } */
> +/* { dg-require-effective-target p8vector_hw } */
> +
> +#ifndef CHECK_H
> +#define CHECK_H "ssse3-check.h"
> +#endif
> +
> +#ifndef TEST
> +#define TEST ssse3_test
> +#endif
> +
> +#include CHECK_H
> +
> +#include "ssse3-vals.h"
> +
> +#include <tmmintrin.h>
> +
> +#ifndef __AVX__
> +/* Test the 64-bit form */
> +static void
> +ssse3_test_pabsd (int *i1, int *r)
> +{
> +  __m64 t1 = *(__m64 *) i1;
> +  *(__m64 *) r = _mm_abs_pi32 (t1);
> +  _mm_empty ();
> +}
> +#endif
> +
> +/* Test the 128-bit form */
> +static void
> +ssse3_test_pabsd128 (int *i1, int *r)
> +{
> +  /* Assumes incoming pointers are 16-byte aligned */
> +  __m128i t1 = *(__m128i *) i1;
> +  *(__m128i *) r = _mm_abs_epi32 (t1);
> +}
> +
> +/* Routine to manually compute the results */
> +static void
> +compute_correct_result (int *i1, int *r)
> +{
> +  int i;
> +
> +  for (i = 0; i < 4; i++)
> +    if (i1[i] < 0)
> +      r[i] = -i1[i];
> +    else
> +      r[i] = i1[i];
> +}
> +
> +static void
> +TEST (void)
> +{
> +  int i;
> +  int r [4] __attribute__ ((aligned(16)));
> +  int ck [4];
> +  int fail = 0;
> +
> +  for (i = 0; i < 256; i += 4)
> +    {
> +      /* Manually compute the result */
> +      compute_correct_result(&vals[i + 0], ck);
> +
> +#ifndef __AVX__
> +      /* Run the 64-bit tests */
> +      ssse3_test_pabsd (&vals[i + 0], &r[0]);
> +      ssse3_test_pabsd (&vals[i + 2], &r[2]);
> +      fail += chk_128 (ck, r);
> +#endif
> +
> +      /* Run the 128-bit tests */
> +      ssse3_test_pabsd128 (&vals[i + 0], r);
> +      fail += chk_128(ck, r);
> +    }
> +
> +  if (fail != 0)
> +    abort ();
> +}
> Index: gcc/testsuite/gcc.target/powerpc/ssse3-pabsw.c
> ===================================================================
> diff --git a/trunk/gcc/testsuite/gcc.target/powerpc/ssse3-pabsw.c b/trunk/gcc/testsuite/gcc.target/powerpc/ssse3-pabsw.c
> new file mode 10644
> --- /dev/null	(revision 0)
> +++ b/trunk/gcc/testsuite/gcc.target/powerpc/ssse3-pabsw.c	(working copy)
> @@ -0,0 +1,81 @@
> +/* { dg-do run } */
> +/* { dg-options "-O3 -mpower8-vector -Wno-psabi" } */
> +/* { dg-require-effective-target p8vector_hw } */
> +
> +#ifndef CHECK_H
> +#define CHECK_H "ssse3-check.h"
> +#endif
> +
> +#ifndef TEST
> +#define TEST ssse3_test
> +#endif
> +
> +#include CHECK_H
> +
> +#include "ssse3-vals.h"
> +
> +#include <tmmintrin.h>
> +
> +#ifndef __AVX__
> +/* Test the 64-bit form */
> +static void
> +ssse3_test_pabsw (int *i1, int *r)
> +{
> +  __m64 t1 = *(__m64 *) i1;
> +  *(__m64 *) r = _mm_abs_pi16 (t1);
> +  _mm_empty ();
> +}
> +#endif
> +
> +/* Test the 128-bit form */
> +static void
> +ssse3_test_pabsw128 (int *i1, int *r)
> +{
> +  /* Assumes incoming pointers are 16-byte aligned */
> +  __m128i t1 = *(__m128i *) i1;
> +  *(__m128i *) r = _mm_abs_epi16 (t1);
> +}
> +
> +/* Routine to manually compute the results */
> +static void
> +compute_correct_result (int *i1, int *r)
> +{
> +  short *s1 = (short *) i1;
> +  short *sout = (short *) r;
> +  int i;
> +
> +  for (i = 0; i < 8; i++)
> +    if (s1[i] < 0)
> +      sout[i] = -s1[i];
> +    else
> +      sout[i] = s1[i];
> +}
> +
> +static void
> +TEST (void)
> +{
> +  int i;
> +  int r [4] __attribute__ ((aligned(16)));
> +  int ck [4];
> +  int fail = 0;
> +
> +  for (i = 0; i < 256; i += 4)
> +    {
> +      /* Manually compute the result */
> +      compute_correct_result (&vals[i + 0], ck);
> +
> +#ifndef __AVX__
> +      /* Run the 64-bit tests */
> +      ssse3_test_pabsw (&vals[i + 0], &r[0]);
> +      ssse3_test_pabsw (&vals[i + 2], &r[2]);
> +      fail += chk_128 (ck, r);
> +#endif
> +
> +      /* Run the 128-bit tests */
> +      ssse3_test_pabsw128 (&vals[i + 0], r);
> +      fail += chk_128 (ck, r);
> +    }
> +  
> +  if (fail != 0)
> +    abort ();
> +}
> Index: gcc/testsuite/gcc.target/powerpc/ssse3-palignr.c
> ===================================================================
> diff --git a/trunk/gcc/testsuite/gcc.target/powerpc/ssse3-palignr.c b/trunk/gcc/testsuite/gcc.target/powerpc/ssse3-palignr.c
> new file mode 10644
> --- /dev/null	(revision 0)
> +++ b/trunk/gcc/testsuite/gcc.target/powerpc/ssse3-palignr.c	(working copy)
> @@ -0,0 +1,279 @@
> +/* { dg-do run } */
> +/* { dg-options "-O3 -mpower8-vector -Wno-psabi" } */
> +/* { dg-require-effective-target p8vector_hw } */
> +
> +#ifndef CHECK_H
> +#define CHECK_H "ssse3-check.h"
> +#endif
> +
> +#ifndef TEST
> +#define TEST ssse3_test
> +#endif
> +
> +#include CHECK_H
> +
> +#include "ssse3-vals.h"
> +
> +#include <tmmintrin.h>
> +#include <string.h>
> +
> +#ifndef __AVX__
> +/* Test the 64-bit form */
> +static void
> +ssse3_test_palignr (int *i1, int *i2, unsigned int imm, int *r)
> +{
> +  __m64 t1 = *(__m64 *) i1;
> +  __m64 t2 = *(__m64 *) i2;
> +
> +  switch (imm)
> +    {
> +    case 0:
> +      *(__m64 *) r = _mm_alignr_pi8 (t1, t2, 0);
> +      break;
> +    case 1:
> +      *(__m64 *) r = _mm_alignr_pi8 (t1, t2, 1);
> +      break;
> +    case 2:
> +      *(__m64 *) r = _mm_alignr_pi8 (t1, t2, 2);
> +      break;
> +    case 3:
> +      *(__m64 *) r = _mm_alignr_pi8 (t1, t2, 3);
> +      break;
> +    case 4:
> +      *(__m64 *) r = _mm_alignr_pi8 (t1, t2, 4);
> +      break;
> +    case 5:
> +      *(__m64 *) r = _mm_alignr_pi8 (t1, t2, 5);
> +      break;
> +    case 6:
> +      *(__m64 *) r = _mm_alignr_pi8 (t1, t2, 6);
> +      break;
> +    case 7:
> +      *(__m64 *) r = _mm_alignr_pi8 (t1, t2, 7);
> +      break;
> +    case 8:
> +      *(__m64 *) r = _mm_alignr_pi8 (t1, t2, 8);
> +      break;
> +    case 9:
> +      *(__m64 *) r = _mm_alignr_pi8 (t1, t2, 9);
> +      break;
> +    case 10:
> +      *(__m64 *) r = _mm_alignr_pi8 (t1, t2, 10);
> +      break;
> +    case 11:
> +      *(__m64 *) r = _mm_alignr_pi8 (t1, t2, 11);
> +      break;
> +    case 12:
> +      *(__m64 *) r = _mm_alignr_pi8 (t1, t2, 12);
> +      break;
> +    case 13:
> +      *(__m64 *) r = _mm_alignr_pi8 (t1, t2, 13);
> +      break;
> +    case 14:
> +      *(__m64 *) r = _mm_alignr_pi8 (t1, t2, 14);
> +      break;
> +    case 15:
> +      *(__m64 *) r = _mm_alignr_pi8 (t1, t2, 15);
> +      break;
> +    default:
> +      *(__m64 *) r = _mm_alignr_pi8 (t1, t2, 16);
> +      break;
> +    }
> +
> +   _mm_empty();
> +}
> +#endif
> +
> +/* Test the 128-bit form */
> +static void
> +ssse3_test_palignr128 (int *i1, int *i2, unsigned int imm, int *r)
> +{
> +  /* Assumes incoming pointers are 16-byte aligned */
> +  __m128i t1 = *(__m128i *) i1;
> +  __m128i t2 = *(__m128i *) i2;
> +
> +  switch (imm)
> +    {
> +    case 0:
> +      *(__m128i *) r = _mm_alignr_epi8 (t1, t2, 0);
> +      break;
> +    case 1:
> +      *(__m128i *) r = _mm_alignr_epi8 (t1, t2, 1);
> +      break;
> +    case 2:
> +      *(__m128i *) r = _mm_alignr_epi8 (t1, t2, 2);
> +      break;
> +    case 3:
> +      *(__m128i *) r = _mm_alignr_epi8 (t1, t2, 3);
> +      break;
> +    case 4:
> +      *(__m128i *) r = _mm_alignr_epi8 (t1, t2, 4);
> +      break;
> +    case 5:
> +      *(__m128i *) r = _mm_alignr_epi8 (t1, t2, 5);
> +      break;
> +    case 6:
> +      *(__m128i *) r = _mm_alignr_epi8 (t1, t2, 6);
> +      break;
> +    case 7:
> +      *(__m128i *) r = _mm_alignr_epi8 (t1, t2, 7);
> +      break;
> +    case 8:
> +      *(__m128i *) r = _mm_alignr_epi8 (t1, t2, 8);
> +      break;
> +    case 9:
> +      *(__m128i *) r = _mm_alignr_epi8 (t1, t2, 9);
> +      break;
> +    case 10:
> +      *(__m128i *) r = _mm_alignr_epi8 (t1, t2, 10);
> +      break;
> +    case 11:
> +      *(__m128i *) r = _mm_alignr_epi8 (t1, t2, 11);
> +      break;
> +    case 12:
> +      *(__m128i *) r = _mm_alignr_epi8 (t1, t2, 12);
> +      break;
> +    case 13:
> +      *(__m128i *) r = _mm_alignr_epi8 (t1, t2, 13);
> +      break;
> +    case 14:
> +      *(__m128i *) r = _mm_alignr_epi8 (t1, t2, 14);
> +      break;
> +    case 15:
> +      *(__m128i *) r = _mm_alignr_epi8 (t1, t2, 15);
> +      break;
> +    case 16:
> +      *(__m128i *) r = _mm_alignr_epi8 (t1, t2, 16);
> +      break;
> +    case 17:
> +      *(__m128i *) r = _mm_alignr_epi8 (t1, t2, 17);
> +      break;
> +    case 18:
> +      *(__m128i *) r = _mm_alignr_epi8 (t1, t2, 18);
> +      break;
> +    case 19:
> +      *(__m128i *) r = _mm_alignr_epi8 (t1, t2, 19);
> +      break;
> +    case 20:
> +      *(__m128i *) r = _mm_alignr_epi8 (t1, t2, 20);
> +      break;
> +    case 21:
> +      *(__m128i *) r = _mm_alignr_epi8 (t1, t2, 21);
> +      break;
> +    case 22:
> +      *(__m128i *) r = _mm_alignr_epi8 (t1, t2, 22);
> +      break;
> +    case 23:
> +      *(__m128i *) r = _mm_alignr_epi8 (t1, t2, 23);
> +      break;
> +    case 24:
> +      *(__m128i *) r = _mm_alignr_epi8 (t1, t2, 24);
> +      break;
> +    case 25:
> +      *(__m128i *) r = _mm_alignr_epi8 (t1, t2, 25);
> +      break;
> +    case 26:
> +      *(__m128i *) r = _mm_alignr_epi8 (t1, t2, 26);
> +      break;
> +    case 27:
> +      *(__m128i *) r = _mm_alignr_epi8 (t1, t2, 27);
> +      break;
> +    case 28:
> +      *(__m128i *) r = _mm_alignr_epi8 (t1, t2, 28);
> +      break;
> +    case 29:
> +      *(__m128i *) r = _mm_alignr_epi8 (t1, t2, 29);
> +      break;
> +    case 30:
> +      *(__m128i *) r = _mm_alignr_epi8 (t1, t2, 30);
> +      break;
> +    case 31:
> +      *(__m128i *) r = _mm_alignr_epi8 (t1, t2, 31);
> +      break;
> +    default:
> +      *(__m128i *) r = _mm_alignr_epi8 (t1, t2, 32);
> +      break;
> +    }
> +}
> +
> +/* Routine to manually compute the results */
> +static void
> +compute_correct_result_128 (int *i1, int *i2, unsigned int imm, int *r)
> +{
> +  char buf [32];
> +  char *bout = (char *) r;
> +  int i;
> +
> +  memcpy (&buf[0], i2, 16);
> +  memcpy (&buf[16], i1, 16);
> +
> +  for (i = 0; i < 16; i++)
> +    if (imm >= 32 || imm + i >= 32)
> +      bout[i] = 0;
> +    else
> +      bout[i] = buf[imm + i];
> +}
> +
> +#ifndef __AVX__
> +static void
> +compute_correct_result_64 (int *i1, int *i2, unsigned int imm, int *r)
> +{
> +  char buf [16];
> +  char *bout = (char *)r;
> +  int i;
> +
> +  /* Handle the first half */
> +  memcpy (&buf[0], i2, 8);
> +  memcpy (&buf[8], i1, 8);
> +
> +  for (i = 0; i < 8; i++)
> +    if (imm >= 16 || imm + i >= 16)
> +      bout[i] = 0;
> +    else
> +      bout[i] = buf[imm + i];
> +
> +  /* Handle the second half */
> +  memcpy (&buf[0], &i2[2], 8);
> +  memcpy (&buf[8], &i1[2], 8);
> +
> +  for (i = 0; i < 8; i++)
> +    if (imm >= 16 || imm + i >= 16)
> +      bout[i + 8] = 0;
> +    else
> +      bout[i + 8] = buf[imm + i];
> +}
> +#endif
> +
> +static void
> +TEST (void)
> +{
> +  int i;
> +  int r [4] __attribute__ ((aligned(16)));
> +  int ck [4];
> +  unsigned int imm;
> +  int fail = 0;
> +
> +  for (i = 0; i < 256; i += 8)
> +    for (imm = 0; imm < 100; imm++)
> +      {
> +#ifndef __AVX__
> +	/* Manually compute the result */
> +	compute_correct_result_64 (&vals[i + 0], &vals[i + 4], imm, ck);
> +
> +	/* Run the 64-bit tests */
> +	ssse3_test_palignr (&vals[i + 0], &vals[i + 4], imm, &r[0]);
> +	ssse3_test_palignr (&vals[i + 2], &vals[i + 6], imm, &r[2]);
> +	fail += chk_128 (ck, r);
> +#endif
> +
> +	/* Recompute the results for 128-bits */
> +	compute_correct_result_128 (&vals[i + 0], &vals[i + 4], imm, ck);
> +
> +	/* Run the 128-bit tests */
> +	ssse3_test_palignr128 (&vals[i + 0], &vals[i + 4], imm, r);
> +	fail += chk_128 (ck, r);
> +      }
> +
> +  if (fail != 0)
> +    abort ();
> +}
> Index: gcc/testsuite/gcc.target/powerpc/ssse3-phaddd.c
> ===================================================================
> diff --git a/trunk/gcc/testsuite/gcc.target/powerpc/ssse3-phaddd.c b/trunk/gcc/testsuite/gcc.target/powerpc/ssse3-phaddd.c
> new file mode 10644
> --- /dev/null	(revision 0)
> +++ b/trunk/gcc/testsuite/gcc.target/powerpc/ssse3-phaddd.c	(working copy)
> @@ -0,0 +1,81 @@
> +/* { dg-do run } */
> +/* { dg-options "-O3 -mpower8-vector -Wno-psabi" } */
> +/* { dg-require-effective-target p8vector_hw } */
> +
> +#ifndef CHECK_H
> +#define CHECK_H "ssse3-check.h"
> +#endif
> +
> +#ifndef TEST
> +#define TEST ssse3_test
> +#endif
> +
> +#include CHECK_H
> +
> +#include "ssse3-vals.h"
> +
> +#include <tmmintrin.h>
> +
> +#ifndef __AVX__
> +/* Test the 64-bit form */
> +static void
> +ssse3_test_phaddd (int *i1, int *i2, int *r)
> +{
> +  __m64 t1 = *(__m64 *) i1;
> +  __m64 t2 = *(__m64 *) i2;
> +  *(__m64 *) r = _mm_hadd_pi32 (t1, t2);
> +  _mm_empty();
> +}
> +#endif
> +
> +/* Test the 128-bit form */
> +static void
> +ssse3_test_phaddd128 (int *i1, int *i2, int *r)
> +{
> +  /* Assumes incoming pointers are 16-byte aligned */
> +  __m128i t1 = *(__m128i *) i1;
> +  __m128i t2 = *(__m128i *) i2;
> +  *(__m128i *) r = _mm_hadd_epi32 (t1, t2);
> +}
> +
> +/* Routine to manually compute the results */
> +static void
> +compute_correct_result(int *i1, int *i2, int *r)
> +{
> +  int i;
> +
> +  for (i = 0; i < 2; i++)
> +    r[i] = i1[2 * i] + i1[2 * i + 1];
> +  for (i = 0; i < 2; i++)
> +    r[i + 2] = i2[2 * i] + i2[2 * i + 1];
> +}
> +
> +static void
> +TEST (void)
> +{
> +  int i;
> +  int r [4] __attribute__ ((aligned(16)));
> +  int ck [4];
> +  int fail = 0;
> +
> +
> +  for (i = 0; i < 256; i += 8)
> +    {
> +      /* Manually compute the result */
> +      compute_correct_result (&vals[i + 0], &vals[i + 4], ck);
> +
> +#ifndef __AVX__
> +      /* Run the 64-bit tests */
> +      ssse3_test_phaddd (&vals[i + 0], &vals[i + 2], &r[0]);
> +      ssse3_test_phaddd (&vals[i + 4], &vals[i + 6], &r[2]);
> +      fail += chk_128 (ck, r);
> +#endif
> +
> +      /* Run the 128-bit tests */
> +      ssse3_test_phaddd128 (&vals[i + 0], &vals[i + 4], r);
> +      fail += chk_128 (ck, r);
> +    }
> +
> +  if (fail != 0)
> +    abort ();
> +}
> Index: gcc/testsuite/gcc.target/powerpc/ssse3-phaddsw.c
> ===================================================================
> diff --git a/trunk/gcc/testsuite/gcc.target/powerpc/ssse3-phaddsw.c b/trunk/gcc/testsuite/gcc.target/powerpc/ssse3-phaddsw.c
> new file mode 10644
> --- /dev/null	(revision 0)
> +++ b/trunk/gcc/testsuite/gcc.target/powerpc/ssse3-phaddsw.c	(working copy)
> @@ -0,0 +1,95 @@
> +/* { dg-do run } */
> +/* { dg-options "-O3 -mpower8-vector -Wno-psabi" } */
> +/* { dg-require-effective-target p8vector_hw } */
> +
> +#ifndef CHECK_H
> +#define CHECK_H "ssse3-check.h"
> +#endif
> +
> +#ifndef TEST
> +#define TEST ssse3_test
> +#endif
> +
> +#include CHECK_H
> +
> +#include "ssse3-vals.h"
> +
> +#include <tmmintrin.h>
> +
> +#ifndef __AVX__
> +/* Test the 64-bit form */
> +static void
> +ssse3_test_phaddsw (int *i1, int *i2, int *r)
> +{
> +  __m64 t1 = *(__m64 *) i1;
> +  __m64 t2 = *(__m64 *) i2;
> +  *(__m64 *) r = _mm_hadds_pi16 (t1, t2);
> +  _mm_empty ();
> +}
> +#endif
> +
> +/* Test the 128-bit form */
> +static void
> +ssse3_test_phaddsw128 (int *i1, int *i2, int *r)
> +{
> +  /* Assumes incoming pointers are 16-byte aligned */
> +  __m128i t1 = *(__m128i *) i1;
> +  __m128i t2 = *(__m128i *) i2;
> + *(__m128i *) r = _mm_hadds_epi16 (t1, t2);
> +}
> +
> +static short
> +signed_saturate_to_word (int x)
> +{
> +  if (x > (int) 0x7fff)
> +    return 0x7fff;
> +
> +  if (x < (int) 0xffff8000)
> +    return 0x8000;
> +
> +  return (short) x;
> +}
> +
> +/* Routine to manually compute the results */
> +static void
> +compute_correct_result (int *i1, int *i2, int *r)
> +{
> +  short *s1 = (short *) i1;
> +  short *s2 = (short *) i2;
> +  short *sout = (short *) r;
> +  int i;
> +
> +  for (i = 0; i < 4; i++)
> +    sout[i] = signed_saturate_to_word(s1[2 * i] + s1[2 * i + 1]);
> +  for (i = 0; i < 4; i++)
> +    sout[i + 4] = signed_saturate_to_word(s2[2 * i] + s2[2 * i + 1]);
> +}
> +
> +static void
> +TEST (void)
> +{
> +  int i;
> +  int r [4] __attribute__ ((aligned(16)));
> +  int ck [4];
> +  int fail = 0;
> +
> +  for (i = 0; i < 256; i += 8)
> +    {
> +      /* Manually compute the result */
> +      compute_correct_result (&vals[i + 0], &vals[i + 4], ck);
> +
> +#ifndef __AVX__
> +      /* Run the 64-bit tests */
> +      ssse3_test_phaddsw (&vals[i + 0], &vals[i + 2], &r[0]);
> +      ssse3_test_phaddsw (&vals[i + 4], &vals[i + 6], &r[2]);
> +      fail += chk_128 (ck, r);
> +#endif
> +
> +      /* Run the 128-bit tests */
> +      ssse3_test_phaddsw128 (&vals[i + 0], &vals[i + 4], r);
> +      fail += chk_128 (ck, r);
> +    }
> +
> +  if (fail != 0)
> +    abort ();
> +}
> Index: gcc/testsuite/gcc.target/powerpc/ssse3-phaddw.c
> ===================================================================
> diff --git a/trunk/gcc/testsuite/gcc.target/powerpc/ssse3-phaddw.c b/trunk/gcc/testsuite/gcc.target/powerpc/ssse3-phaddw.c
> new file mode 10644
> --- /dev/null	(revision 0)
> +++ b/trunk/gcc/testsuite/gcc.target/powerpc/ssse3-phaddw.c	(working copy)
> @@ -0,0 +1,84 @@
> +/* { dg-do run } */
> +/* { dg-options "-O3 -mpower8-vector -Wno-psabi" } */
> +/* { dg-require-effective-target p8vector_hw } */
> +
> +#ifndef CHECK_H
> +#define CHECK_H "ssse3-check.h"
> +#endif
> +
> +#ifndef TEST
> +#define TEST ssse3_test
> +#endif
> +
> +#include CHECK_H
> +
> +#include "ssse3-vals.h"
> +
> +#include <tmmintrin.h>
> +
> +#ifndef __AVX__
> +/* Test the 64-bit form */
> +static void
> +ssse3_test_phaddw (int *i1, int *i2, int *r)
> +{
> +  __m64 t1 = *(__m64 *) i1;
> +  __m64 t2 = *(__m64 *) i2;
> +  *(__m64 *) r = _mm_hadd_pi16 (t1, t2);
> +  _mm_empty ();
> +}
> +#endif
> +
> +/* Test the 128-bit form */
> +static void
> +ssse3_test_phaddw128 (int *i1, int *i2, int *r)
> +{
> +  /* Assumes incoming pointers are 16-byte aligned */
> +  __m128i t1 = *(__m128i *) i1;
> +  __m128i t2 = *(__m128i *) i2;
> +  *(__m128i *) r = _mm_hadd_epi16 (t1, t2);
> +}
> +
> +/* Routine to manually compute the results */
> +static void
> +compute_correct_result(int *i1, int *i2, int *r)
> +{
> +  short *s1 = (short *) i1;
> +  short *s2 = (short *) i2;
> +  short *sout = (short *) r;
> +  int i;
> +
> +  for (i = 0; i < 4; i++)
> +    sout[i] = s1[2 * i] + s1[2 * i + 1];
> +
> +  for (i = 0; i < 4; i++)
> +    sout[i + 4] = s2[2 * i] + s2[2 * i + 1];
> +}
> +
> +static void
> +TEST (void)
> +{
> +  int i;
> +  int r [4] __attribute__ ((aligned(16)));
> +  int ck [4];
> +  int fail = 0;
> +
> +  for (i = 0; i < 256; i += 8)
> +    {
> +      /* Manually compute the result */
> +      compute_correct_result (&vals[i + 0], &vals[i + 4], ck);
> +
> +#ifndef __AVX__
> +      /* Run the 64-bit tests */
> +      ssse3_test_phaddw (&vals[i + 0], &vals[i + 2], &r[0]);
> +      ssse3_test_phaddw (&vals[i + 4], &vals[i + 6], &r[2]);
> +      fail += chk_128 (ck, r);
> +#endif
> +
> +      /* Run the 128-bit tests */
> +      ssse3_test_phaddw128 (&vals[i + 0], &vals[i + 4], r);
> +      fail += chk_128 (ck, r);
> +    }
> +
> +  if (fail != 0)
> +    abort ();
> +}
> Index: gcc/testsuite/gcc.target/powerpc/ssse3-phsubd.c
> ===================================================================
> diff --git a/trunk/gcc/testsuite/gcc.target/powerpc/ssse3-phsubd.c b/trunk/gcc/testsuite/gcc.target/powerpc/ssse3-phsubd.c
> new file mode 10644
> --- /dev/null	(revision 0)
> +++ b/trunk/gcc/testsuite/gcc.target/powerpc/ssse3-phsubd.c	(working copy)
> @@ -0,0 +1,80 @@
> +/* { dg-do run } */
> +/* { dg-options "-O3 -mpower8-vector -Wno-psabi" } */
> +/* { dg-require-effective-target p8vector_hw } */
> +
> +#ifndef CHECK_H
> +#define CHECK_H "ssse3-check.h"
> +#endif
> +
> +#ifndef TEST
> +#define TEST ssse3_test
> +#endif
> +
> +#include CHECK_H
> +
> +#include "ssse3-vals.h"
> +
> +#include <tmmintrin.h>
> +
> +#ifndef __AVX__
> +/* Test the 64-bit form */
> +static void
> +ssse3_test_phsubd (int *i1, int *i2, int *r)
> +{
> +  __m64 t1 = *(__m64 *) i1;
> +  __m64 t2 = *(__m64 *) i2;
> +  *(__m64 *) r = _mm_hsub_pi32(t1, t2);
> +  _mm_empty ();
> +}
> +#endif
> +
> +/* Test the 128-bit form */
> +static void
> +ssse3_test_phsubd128 (int *i1, int *i2, int *r)
> +{
> +  /* Assumes incoming pointers are 16-byte aligned */
> +  __m128i t1 = *(__m128i *) i1;
> +  __m128i t2 = *(__m128i *) i2;
> +  *(__m128i *) r = _mm_hsub_epi32 (t1, t2);
> +}
> +
> +/* Routine to manually compute the results */
> +static void
> +compute_correct_result (int *i1, int *i2, int *r)
> +{
> +  int i;
> +
> +  for (i = 0; i < 2; i++)
> +    r[i] = i1[2 * i] - i1[2 * i + 1];
> +  for (i = 0; i < 2; i++)
> +    r[i + 2] = i2[2 * i] - i2[2 * i + 1];
> +}
> +
> +static void
> +TEST (void)
> +{
> +  int i;
> +  int r [4] __attribute__ ((aligned(16)));
> +  int ck [4];
> +  int fail = 0;
> +
> +  for (i = 0; i < 256; i += 8)
> +    {
> +      /* Manually compute the result */
> +      compute_correct_result (&vals[i + 0], &vals[i + 4], ck);
> +
> +#ifndef __AVX__
> +      /* Run the 64-bit tests */
> +      ssse3_test_phsubd (&vals[i + 0], &vals[i + 2], &r[0]);
> +      ssse3_test_phsubd (&vals[i + 4], &vals[i + 6], &r[2]);
> +      fail += chk_128 (ck, r);
> +#endif
> +
> +      /* Run the 128-bit tests */
> +      ssse3_test_phsubd128 (&vals[i + 0], &vals[i + 4], r);
> +      fail += chk_128 (ck, r);
> +    }
> +
> +  if (fail != 0)
> +    abort ();
> +}
> Index: gcc/testsuite/gcc.target/powerpc/ssse3-phsubsw.c
> ===================================================================
> diff --git a/trunk/gcc/testsuite/gcc.target/powerpc/ssse3-phsubsw.c b/trunk/gcc/testsuite/gcc.target/powerpc/ssse3-phsubsw.c
> new file mode 10644
> --- /dev/null	(revision 0)
> +++ b/trunk/gcc/testsuite/gcc.target/powerpc/ssse3-phsubsw.c	(working copy)
> @@ -0,0 +1,98 @@
> +/* { dg-do run } */
> +/* { dg-options "-O3 -mpower8-vector -Wno-psabi" } */
> +/* { dg-require-effective-target p8vector_hw } */
> +
> +#ifndef CHECK_H
> +#define CHECK_H "ssse3-check.h"
> +#endif
> +
> +#ifndef TEST
> +#define TEST ssse3_test
> +#endif
> +
> +#include CHECK_H
> +
> +#include "ssse3-vals.h"
> +
> +#include <tmmintrin.h>
> +
> +#ifndef __AVX__
> +/* Test the 64-bit form */
> +static void
> +ssse3_test_phsubsw (int *i1, int *i2, int *r)
> +{
> +  __m64 t1 = *(__m64 *) i1;
> +  __m64 t2 = *(__m64 *) i2;
> +
> +  *(__m64 *) r = _mm_hsubs_pi16 (t1, t2);
> +
> +  _mm_empty ();
> +}
> +#endif
> +
> +/* Test the 128-bit form */
> +static void
> +ssse3_test_phsubsw128 (int *i1, int *i2, int *r)
> +{
> +  /* Assumes incoming pointers are 16-byte aligned */
> +  __m128i t1 = *(__m128i *) i1;
> +  __m128i t2 = *(__m128i *) i2;
> +  *(__m128i *) r = _mm_hsubs_epi16 (t1, t2);
> +}
> +
> +static short
> +signed_saturate_to_word (int x)
> +{
> +  if (x > (int )0x7fff)
> +    return 0x7fff;
> +
> +  if (x < (int) 0xffff8000)
> +    return 0x8000;
> +
> +  return (short)x;
> +}
> +
> +/* Routine to manually compute the results */
> +static void
> +compute_correct_result (int *i1, int *i2, int *r)
> +{
> +  short *s1 = (short *) i1;
> +  short *s2 = (short *) i2;
> +  short *sout = (short *) r;
> +  int i;
> +
> +  for (i = 0; i < 4; i++)
> +    sout[i] = signed_saturate_to_word (s1[2 * i] - s1[2 * i + 1]);
> +
> +  for (i = 0; i < 4; i++)
> +    sout[i + 4] = signed_saturate_to_word (s2[2 * i] - s2[2 * i + 1]);
> +}
> +
> +static void
> +TEST (void)
> +{
> +  int i;
> +  int r [4] __attribute__ ((aligned(16)));
> +  int ck [4];
> +  int fail = 0;
> +
> +  for (i = 0; i < 256; i += 8)
> +    {
> +      /* Manually compute the result */
> +      compute_correct_result (&vals[i + 0], &vals[i + 4], ck);
> +
> +#ifndef __AVX__
> +      /* Run the 64-bit tests */
> +      ssse3_test_phsubsw (&vals[i + 0], &vals[i + 2], &r[0]);
> +      ssse3_test_phsubsw (&vals[i + 4], &vals[i + 6], &r[2]);
> +      fail += chk_128 (ck, r);
> +#endif
> +
> +      /* Run the 128-bit tests */
> +      ssse3_test_phsubsw128 (&vals[i + 0], &vals[i + 4], r);
> +      fail += chk_128 (ck, r);
> +    }
> +
> +  if (fail != 0)
> +    abort ();
> +}
> Index: gcc/testsuite/gcc.target/powerpc/ssse3-phsubw.c
> ===================================================================
> diff --git a/trunk/gcc/testsuite/gcc.target/powerpc/ssse3-phsubw.c b/trunk/gcc/testsuite/gcc.target/powerpc/ssse3-phsubw.c
> new file mode 10644
> --- /dev/null	(revision 0)
> +++ b/trunk/gcc/testsuite/gcc.target/powerpc/ssse3-phsubw.c	(working copy)
> @@ -0,0 +1,83 @@
> +/* { dg-do run } */
> +/* { dg-options "-O3 -mpower8-vector -Wno-psabi" } */
> +/* { dg-require-effective-target p8vector_hw } */
> +
> +#ifndef CHECK_H
> +#define CHECK_H "ssse3-check.h"
> +#endif
> +
> +#ifndef TEST
> +#define TEST ssse3_test
> +#endif
> +
> +#include CHECK_H
> +#include "ssse3-vals.h"
> +
> +#include <tmmintrin.h>
> +
> +#ifndef __AVX__
> +/* Test the 64-bit form */
> +static void
> +ssse3_test_phsubw (int *i1, int *i2, int *r)
> +{
> +  __m64 t1 = *(__m64 *) i1;
> +  __m64 t2 = *(__m64 *) i2;
> +  *(__m64 *) r = _mm_hsub_pi16 (t1, t2);
> +  _mm_empty ();
> +}
> +#endif
> +
> +/* Test the 128-bit form */
> +static void
> +ssse3_test_phsubw128 (int *i1, int *i2, int *r)
> +{
> +  /* Assumes incoming pointers are 16-byte aligned */
> +  __m128i t1 = *(__m128i *) i1;
> +  __m128i t2 = *(__m128i *) i2;
> +
> +  *(__m128i *) r = _mm_hsub_epi16 (t1, t2);
> +}
> +
> +/* Routine to manually compute the results */
> +static void
> +compute_correct_result (int *i1, int *i2, int *r)
> +{
> +  short *s1 = (short *) i1;
> +  short *s2 = (short *) i2;
> +  short *sout = (short *) r;
> +  int i;
> +
> +  for (i = 0; i < 4; i++)
> +    sout[i] = s1[2 * i] - s1[2 * i + 1];
> +  for (i = 0; i < 4; i++)
> +    sout[i + 4] = s2[2 * i] - s2[2 * i + 1];
> +}
> +
> +static void
> +TEST (void)
> +{
> +  int i;
> +  int r [4] __attribute__ ((aligned(16)));
> +  int ck [4];
> +  int fail = 0;
> +
> +  for (i = 0; i < 256; i += 8)
> +    {
> +      /* Manually compute the result */
> +      compute_correct_result (&vals[i + 0], &vals[i + 4], ck);
> +
> +#ifndef __AVX__
> +      /* Run the 64-bit tests */
> +      ssse3_test_phsubw (&vals[i + 0], &vals[i + 2], &r[0]);
> +      ssse3_test_phsubw (&vals[i + 4], &vals[i + 6], &r[2]);
> +      fail += chk_128 (ck, r);
> +#endif
> +
> +      /* Run the 128-bit tests */
> +      ssse3_test_phsubw128 (&vals[i + 0], &vals[i + 4], r);
> +      fail += chk_128 (ck, r);
> +    }
> +
> +  if (fail != 0)
> +    abort ();
> +}
> Index: gcc/testsuite/gcc.target/powerpc/ssse3-pmaddubsw.c
> ===================================================================
> diff --git a/trunk/gcc/testsuite/gcc.target/powerpc/ssse3-pmaddubsw.c b/trunk/gcc/testsuite/gcc.target/powerpc/ssse3-pmaddubsw.c
> new file mode 10644
> --- /dev/null	(revision 0)
> +++ b/trunk/gcc/testsuite/gcc.target/powerpc/ssse3-pmaddubsw.c	(working copy)
> @@ -0,0 +1,98 @@
> +/* { dg-do run } */
> +/* { dg-options "-O3 -mpower8-vector -Wno-psabi" } */
> +/* { dg-require-effective-target p8vector_hw } */
> +
> +#ifndef CHECK_H
> +#define CHECK_H "ssse3-check.h"
> +#endif
> +
> +#ifndef TEST
> +#define TEST ssse3_test
> +#endif
> +
> +#include CHECK_H
> +
> +#include "ssse3-vals.h"
> +
> +#include <tmmintrin.h>
> +
> +#ifndef __AVX__
> +/* Test the 64-bit form */
> +static void
> +ssse3_test_pmaddubsw (int *i1, int *i2, int *r)
> +{
> +  __m64 t1 = *(__m64 *) i1;
> +  __m64 t2 = *(__m64 *) i2;
> +  *(__m64 *) r = _mm_maddubs_pi16 (t1, t2);
> +  _mm_empty ();
> +}
> +#endif
> +
> +/* Test the 128-bit form */
> +static void
> +ssse3_test_pmaddubsw128 (int *i1, int *i2, int *r)
> +{
> +  /* Assumes incoming pointers are 16-byte aligned */
> +  __m128i t1 = *(__m128i *) i1;
> +  __m128i t2 = *(__m128i *) i2;
> +  *(__m128i *) r = _mm_maddubs_epi16 (t1, t2);
> +}
> +
> +static short
> +signed_saturate_to_word(int x)
> +{
> +  if (x > (int) 0x7fff)
> +    return 0x7fff;
> +
> +  if (x < (int) 0xffff8000)
> +    return 0x8000;
> +
> +  return (short) x;
> +}
> +
> +/* Routine to manually compute the results */
> +static void
> +compute_correct_result (int *i1, int *i2, int *r)
> +{
> +  unsigned char *ub1 = (unsigned char *) i1;
> +  char *sb2 = (char *) i2;
> +  short *sout = (short *) r;
> +  int t0;
> +  int i;
> +
> +  for (i = 0; i < 8; i++)
> +    { 
> +      t0 = ((int) ub1[2 * i] * (int) sb2[2 * i] +
> +	    (int) ub1[2 * i + 1] * (int) sb2[2 * i + 1]);
> +      sout[i] = signed_saturate_to_word (t0);
> +    }
> +}
> +
> +static void
> +TEST (void)
> +{
> +  int i;
> +  int r [4] __attribute__ ((aligned(16)));
> +  int ck [4];
> +  int fail = 0;
> +
> +  for (i = 0; i < 256; i += 8)
> +    {
> +      /* Manually compute the result */
> +      compute_correct_result (&vals[i + 0], &vals[i + 4], ck);
> +
> +#ifndef __AVX__
> +      /* Run the 64-bit tests */
> +      ssse3_test_pmaddubsw (&vals[i + 0], &vals[i + 4], &r[0]);
> +      ssse3_test_pmaddubsw (&vals[i + 2], &vals[i + 6], &r[2]);
> +      fail += chk_128 (ck, r);
> +#endif
> +
> +      /* Run the 128-bit tests */
> +      ssse3_test_pmaddubsw128 (&vals[i + 0], &vals[i + 4], r);
> +      fail += chk_128 (ck, r);
> +    }
> +
> +  if (fail != 0)
> +    abort ();
> +}
> Index: gcc/testsuite/gcc.target/powerpc/ssse3-pmulhrsw.c
> ===================================================================
> diff --git a/trunk/gcc/testsuite/gcc.target/powerpc/ssse3-pmulhrsw.c b/trunk/gcc/testsuite/gcc.target/powerpc/ssse3-pmulhrsw.c
> new file mode 10644
> --- /dev/null	(revision 0)
> +++ b/trunk/gcc/testsuite/gcc.target/powerpc/ssse3-pmulhrsw.c	(working copy)
> @@ -0,0 +1,85 @@
> +/* { dg-do run } */
> +/* { dg-options "-O3 -mpower8-vector -Wno-psabi" } */
> +/* { dg-require-effective-target p8vector_hw } */
> +
> +#ifndef CHECK_H
> +#define CHECK_H "ssse3-check.h"
> +#endif
> +
> +#ifndef TEST
> +#define TEST ssse3_test
> +#endif
> +
> +#include CHECK_H
> +
> +#include "ssse3-vals.h"
> +
> +#include <tmmintrin.h>
> +
> +#ifndef __AVX__
> +/* Test the 64-bit form */
> +static void
> +ssse3_test_pmulhrsw (int *i1, int *i2, int *r)
> +{
> +  __m64 t1 = *(__m64 *) i1;
> +  __m64 t2 = *(__m64 *) i2;
> +  *(__m64 *) r = _mm_mulhrs_pi16 (t1, t2);
> +  _mm_empty ();
> +}
> +#endif
> +
> +/* Test the 128-bit form */
> +static void
> +ssse3_test_pmulhrsw128 (int *i1, int *i2, int *r)
> +{
> +  /* Assumes incoming pointers are 16-byte aligned */
> +  __m128i t1 = *(__m128i *) i1;
> +  __m128i t2 = *(__m128i *) i2;
> +  *(__m128i *) r = _mm_mulhrs_epi16 (t1, t2);
> +}
> +
> +/* Routine to manually compute the results */
> +static void
> +compute_correct_result (int *i1, int *i2, int *r)
> +{
> +  short *s1 = (short *) i1;
> +  short *s2 = (short *) i2;
> +  short *sout = (short *) r;
> +  int t0;
> +  int i;
> +
> +  for (i = 0; i < 8; i++)
> +    {
> +      t0 = (((int) s1[i] * (int) s2[i]) >> 14) + 1;
> +      sout[i] = (short) (t0 >> 1);
> +    }
> +}
> +
> +static void
> +TEST (void)
> +{
> +  int i;
> +  int r [4] __attribute__ ((aligned(16)));
> +  int ck [4];
> +  int fail = 0;
> +
> +  for (i = 0; i < 256; i += 8)
> +    {
> +      /* Manually compute the result */
> +      compute_correct_result (&vals[i + 0], &vals[i + 4], ck);
> +
> +#ifndef __AVX__
> +      /* Run the 64-bit tests */
> +      ssse3_test_pmulhrsw (&vals[i + 0], &vals[i + 4], &r[0]);
> +      ssse3_test_pmulhrsw (&vals[i + 2], &vals[i + 6], &r[2]);
> +      fail += chk_128 (ck, r);
> +#endif
> +
> +      /* Run the 128-bit tests */
> +      ssse3_test_pmulhrsw128 (&vals[i + 0], &vals[i + 4], r);
> +      fail += chk_128 (ck, r);
> +    }
> +
> +  if (fail != 0)
> +    abort ();
> +}
> Index: gcc/testsuite/gcc.target/powerpc/ssse3-pshufb.c
> ===================================================================
> diff --git a/trunk/gcc/testsuite/gcc.target/powerpc/ssse3-pshufb.c b/trunk/gcc/testsuite/gcc.target/powerpc/ssse3-pshufb.c
> new file mode 10644
> --- /dev/null	(revision 0)
> +++ b/trunk/gcc/testsuite/gcc.target/powerpc/ssse3-pshufb.c	(working copy)
> @@ -0,0 +1,114 @@
> +/* { dg-do run } */
> +/* { dg-options "-O3 -mpower8-vector -Wno-psabi" } */
> +/* { dg-require-effective-target p8vector_hw } */
> +
> +#ifndef CHECK_H
> +#define CHECK_H "ssse3-check.h"
> +#endif
> +
> +#ifndef TEST
> +#define TEST ssse3_test
> +#endif
> +
> +#include CHECK_H
> +
> +#include "ssse3-vals.h"
> +
> +#include <tmmintrin.h>
> +
> +#ifndef __AVX__
> +/* Test the 64-bit form */
> +static void
> +ssse3_test_pshufb (int *i1, int *i2, int *r)
> +{
> +  __m64 t1 = *(__m64 *) i1;
> +  __m64 t2 = *(__m64 *) i2;
> +  *(__m64 *)r = _mm_shuffle_pi8 (t1, t2);
> +  _mm_empty ();
> +}
> +#endif
> +
> +/* Test the 128-bit form */
> +static void
> +ssse3_test_pshufb128 (int *i1, int *i2, int *r)
> +{
> +  /* Assumes incoming pointers are 16-byte aligned */
> +  __m128i t1 = *(__m128i *) i1;
> +  __m128i t2 = *(__m128i *) i2;
> +  *(__m128i *)r = _mm_shuffle_epi8 (t1, t2);
> +}
> +
> +#ifndef __AVX__
> +/* Routine to manually compute the results */
> +static void
> +compute_correct_result_64 (int *i1, int *i2, int *r)
> +{
> +  char *b1 = (char *) i1;
> +  char *b2 = (char *) i2;
> +  char *bout = (char *) r;
> +  int i;
> +  char select;
> +
> +  for (i = 0; i < 16; i++)
> +    {
> +      select = b2[i];
> +      if (select & 0x80)
> +	bout[i] = 0;
> +      else if (i < 8)
> +	bout[i] = b1[select & 0x7];
> +      else
> +	bout[i] = b1[8 + (select & 0x7)];
> +    }
> +}
> +#endif
> +
> +static void
> +compute_correct_result_128 (int *i1, int *i2, int *r)
> +{
> +  char *b1 = (char *) i1;
> +  char *b2 = (char *) i2;
> +  char *bout = (char *) r;
> +  int i;
> +  char select;
> +
> +  for (i = 0; i < 16; i++)
> +    {
> +      select = b2[i];
> +      if (select & 0x80)
> +	bout[i] = 0;
> +      else
> +	bout[i] = b1[select & 0xf];
> +    }
> +}
> +
> +static void
> +TEST (void)
> +{
> +  int i;
> +  int r [4] __attribute__ ((aligned(16)));
> +  int ck [4];
> +  int fail = 0;
> +
> +  for (i = 0; i < 256; i += 8)
> +    {
> +#ifndef __AVX__
> +      /* Manually compute the result */
> +      compute_correct_result_64 (&vals[i + 0], &vals[i + 4], ck);
> +
> +      /* Run the 64-bit tests */
> +      ssse3_test_pshufb (&vals[i + 0], &vals[i + 4], &r[0]);
> +      ssse3_test_pshufb (&vals[i + 2], &vals[i + 6], &r[2]);
> +      fail += chk_128 (ck, r);
> +#endif
> +
> +      /* Recompute the result for 128-bits */
> +      compute_correct_result_128 (&vals[i + 0], &vals[i + 4], ck);
> +
> +      /* Run the 128-bit tests */
> +      ssse3_test_pshufb128 (&vals[i + 0], &vals[i + 4], r);
> +      fail += chk_128 (ck, r);
> +    }
> +
> +  if (fail != 0)
> +    abort ();
> +}
> Index: gcc/testsuite/gcc.target/powerpc/ssse3-psignb.c
> ===================================================================
> diff --git a/trunk/gcc/testsuite/gcc.target/powerpc/ssse3-psignb.c b/trunk/gcc/testsuite/gcc.target/powerpc/ssse3-psignb.c
> new file mode 10644
> --- /dev/null	(revision 0)
> +++ b/trunk/gcc/testsuite/gcc.target/powerpc/ssse3-psignb.c	(working copy)
> @@ -0,0 +1,85 @@
> +/* { dg-do run } */
> +/* { dg-options "-O3 -mpower8-vector -Wno-psabi" } */
> +/* { dg-require-effective-target p8vector_hw } */
> +
> +#ifndef CHECK_H
> +#define CHECK_H "ssse3-check.h"
> +#endif
> +
> +#ifndef TEST
> +#define TEST ssse3_test
> +#endif
> +
> +#include CHECK_H
> +#include "ssse3-vals.h"
> +
> +#include <tmmintrin.h>
> +
> +#ifndef __AVX__
> +/* Test the 64-bit form */
> +static void
> +ssse3_test_psignb (int *i1, int *i2, int *r)
> +{
> +  __m64 t1 = *(__m64 *) i1;
> +  __m64 t2 = *(__m64 *) i2;
> +  *(__m64 *) r = _mm_sign_pi8 (t1, t2);
> +  _mm_empty ();
> +}
> +#endif
> +
> +/* Test the 128-bit form */
> +static void
> +ssse3_test_psignb128 (int *i1, int *i2, int *r)
> +{
> +  /* Assumes incoming pointers are 16-byte aligned */
> +  __m128i t1 = *(__m128i *) i1;
> +  __m128i t2 = *(__m128i *) i2;
> +  *(__m128i *) r = _mm_sign_epi8 (t1, t2);
> +}
> +
> +/* Routine to manually compute the results */
> +static void
> +compute_correct_result (int *i1, int *i2, int *r)
> +{
> +  char *b1 = (char *) i1;
> +  char *b2 = (char *) i2;
> +  char *bout = (char *) r;
> +  int i;
> +
> +  for (i = 0; i < 16; i++)
> +    if (b2[i] < 0)
> +      bout[i] = -b1[i];
> +    else if (b2[i] == 0)
> +      bout[i] = 0;
> +    else
> +      bout[i] = b1[i];
> +}
> +
> +static void
> +TEST (void)
> +{
> +  int i;
> +  int r [4] __attribute__ ((aligned(16)));
> +  int ck [4];
> +  int fail = 0;
> +
> +  for (i = 0; i < 256; i += 8)
> +    {
> +      /* Manually compute the result */
> +      compute_correct_result (&vals[i + 0], &vals[i + 4], ck);
> +
> +#ifndef __AVX__
> +      /* Run the 64-bit tests */
> +      ssse3_test_psignb (&vals[i + 0], &vals[i + 4], &r[0]);
> +      ssse3_test_psignb (&vals[i + 2], &vals[i + 6], &r[2]);
> +      fail += chk_128 (ck, r);
> +#endif
> +
> +      /* Run the 128-bit tests */
> +      ssse3_test_psignb128 (&vals[i + 0], &vals[i + 4], r);
> +      fail += chk_128 (ck, r);
> +    }
> +
> +  if (fail != 0)
> +    abort ();
> +}
> Index: gcc/testsuite/gcc.target/powerpc/ssse3-psignd.c
> ===================================================================
> diff --git a/trunk/gcc/testsuite/gcc.target/powerpc/ssse3-psignd.c b/trunk/gcc/testsuite/gcc.target/powerpc/ssse3-psignd.c
> new file mode 10644
> --- /dev/null	(revision 0)
> +++ b/trunk/gcc/testsuite/gcc.target/powerpc/ssse3-psignd.c	(working copy)
> @@ -0,0 +1,82 @@
> +/* { dg-do run } */
> +/* { dg-options "-O3 -mpower8-vector -Wno-psabi" } */
> +/* { dg-require-effective-target p8vector_hw } */
> +
> +#ifndef CHECK_H
> +#define CHECK_H "ssse3-check.h"
> +#endif
> +
> +#ifndef TEST
> +#define TEST ssse3_test
> +#endif
> +
> +#include CHECK_H
> +#include "ssse3-vals.h"
> +
> +#include <tmmintrin.h>
> +
> +#ifndef __AVX__
> +/* Test the 64-bit form */
> +static void
> +ssse3_test_psignd (int *i1, int *i2, int *r)
> +{
> +  __m64 t1 = *(__m64 *) i1;
> +  __m64 t2 = *(__m64 *) i2;
> +  *(__m64 *) r = _mm_sign_pi32 (t1, t2);
> +  _mm_empty ();
> +}
> +#endif
> +
> +/* Test the 128-bit form */
> +static void
> +ssse3_test_psignd128 (int *i1, int *i2, int *r)
> +{
> +  /* Assumes incoming pointers are 16-byte aligned */
> +  __m128i t1 = *(__m128i *) i1;
> +  __m128i t2 = *(__m128i *) i2;
> +  *(__m128i *)r = _mm_sign_epi32 (t1, t2);
> +}
> +
> +/* Routine to manually compute the results */
> +static void
> +compute_correct_result (int *i1, int *i2, int *r)
> +{
> +  int i;
> +
> +  for (i = 0; i < 4; i++)
> +    if (i2[i] < 0)
> +      r[i] = -i1[i];
> +    else if (i2[i] == 0)
> +      r[i] = 0;
> +    else
> +      r[i] = i1[i];
> +}
> +
> +static void
> +TEST (void)
> +{
> +  int i;
> +  int r [4] __attribute__ ((aligned(16)));
> +  int ck [4];
> +  int fail = 0;
> +
> +  for (i = 0; i < 256; i += 8)
> +    {
> +      /* Manually compute the result */
> +      compute_correct_result (&vals[i + 0], &vals[i + 4], ck);
> +
> +#ifndef __AVX__
> +      /* Run the 64-bit tests */
> +      ssse3_test_psignd (&vals[i + 0], &vals[i + 4], &r[0]);
> +      ssse3_test_psignd (&vals[i + 2], &vals[i + 6], &r[2]);
> +      fail += chk_128 (ck, r);
> +#endif
> +
> +      /* Run the 128-bit tests */
> +      ssse3_test_psignd128 (&vals[i + 0], &vals[i + 4], r);
> +      fail += chk_128 (ck, r);
> +    }
> +
> +  if (fail != 0)
> +    abort ();
> +}
> Index: gcc/testsuite/gcc.target/powerpc/ssse3-psignw.c
> ===================================================================
> diff --git a/trunk/gcc/testsuite/gcc.target/powerpc/ssse3-psignw.c b/trunk/gcc/testsuite/gcc.target/powerpc/ssse3-psignw.c
> new file mode 10644
> --- /dev/null	(revision 0)
> +++ b/trunk/gcc/testsuite/gcc.target/powerpc/ssse3-psignw.c	(working copy)
> @@ -0,0 +1,85 @@
> +/* { dg-do run } */
> +/* { dg-options "-O3 -mpower8-vector -Wno-psabi" } */
> +/* { dg-require-effective-target p8vector_hw } */
> +
> +#ifndef CHECK_H
> +#define CHECK_H "ssse3-check.h"
> +#endif
> +
> +#ifndef TEST
> +#define TEST ssse3_test
> +#endif
> +
> +#include CHECK_H
> +#include "ssse3-vals.h"
> +
> +#include <tmmintrin.h>
> +
> +#ifndef __AVX__
> +/* Test the 64-bit form */
> +static void
> +ssse3_test_psignw (int *i1, int *i2, int *r)
> +{
> +  __m64 t1 = *(__m64 *) i1;
> +  __m64 t2 = *(__m64 *) i2;
> +  *(__m64 *) r = _mm_sign_pi16 (t1, t2);
> +  _mm_empty ();
> +}
> +#endif
> +
> +/* Test the 128-bit form */
> +static void
> +ssse3_test_psignw128 (int *i1, int *i2, int *r)
> +{
> +  /* Assumes incoming pointers are 16-byte aligned */
> +  __m128i t1 = *(__m128i *) i1;
> +  __m128i t2 = *(__m128i *) i2;
> + *(__m128i *) r = _mm_sign_epi16 (t1, t2);
> +}
> +
> +/* Routine to manually compute the results */
> +static void
> +compute_correct_result (int *i1, int *i2, int *r)
> +{
> +  short *s1 = (short *) i1;
> +  short *s2 = (short *) i2;
> +  short *sout = (short *) r;
> +  int i;
> +
> +  for (i = 0; i < 8; i++)
> +    if (s2[i] < 0)
> +      sout[i] = -s1[i];
> +    else if (s2[i] == 0)
> +      sout[i] = 0;
> +    else
> +      sout[i] = s1[i];
> +}
> +
> +static void
> +TEST (void)
> +{
> +  int i;
> +  int r [4] __attribute__ ((aligned(16)));
> +  int ck [4];
> +  int fail = 0;
> +
> +  for (i = 0; i < 256; i += 8)
> +    {
> +      /* Manually compute the result */
> +      compute_correct_result (&vals[i + 0], &vals[i + 4], ck);
> +
> +#ifndef __AVX__
> +      /* Run the 64-bit tests */
> +      ssse3_test_psignw (&vals[i + 0], &vals[i + 4], &r[0]);
> +      ssse3_test_psignw (&vals[i + 2], &vals[i + 6], &r[2]);
> +      fail += chk_128 (ck, r);
> +#endif
> +
> +      /* Run the 128-bit tests */
> +      ssse3_test_psignw128 (&vals[i + 0], &vals[i + 4], r);
> +      fail += chk_128 (ck, r);
> +    }
> +
> +  if (fail != 0)
> +    abort ();
> +}
> Index: gcc/testsuite/gcc.target/powerpc/ssse3-vals.h
> ===================================================================
> diff --git a/trunk/gcc/testsuite/gcc.target/powerpc/ssse3-vals.h b/trunk/gcc/testsuite/gcc.target/powerpc/ssse3-vals.h
> new file mode 10644
> --- /dev/null	(revision 0)
> +++ b/trunk/gcc/testsuite/gcc.target/powerpc/ssse3-vals.h	(working copy)
> @@ -0,0 +1,60 @@
> +/* Routine to check correctness of the results */
> +static int
> +chk_128 (int *v1, int *v2)
> +{
> +  int i;
> +  int n_fails = 0;
> +
> +  for (i = 0; i < 4; i++)
> +    if (v1[i] != v2[i])
> +      n_fails += 1;
> +
> +  return n_fails;
> +}
> +
> +static int vals [256] __attribute__ ((aligned(16))) =
> +{
> +  0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x5be800ee, 0x4f2d7b15,
> +  0x409d9291, 0xdd95f27f, 0x423986e3, 0x21a4d2cd, 0xa7056d84, 0x4f4e5a3b,
> +  0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
> +  0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
> +  0x73ef0244, 0xcd836329, 0x847f634f, 0xa7e3abcf, 0xb4c14764, 0x1ef42c06,
> +  0x504f29ac, 0x4ae7ca73, 0xaddde3c9, 0xf63ded2e, 0xa5d3553d, 0xa52ae05f,
> +  0x6fd3c83a, 0x7dc2b300, 0x76b05de7, 0xea8ebae5, 0x549568dd, 0x172f0358,
> +  0x917eadf0, 0x796fb0a7, 0xb39381af, 0xd0591d61, 0x731d2f17, 0xbc4b6f5d,
> +  0x8ec664c2, 0x3c199c19, 0x9c81db12, 0x6d85913b, 0x486107a9, 0xab6f4b26,
> +  0x5630d37c, 0x20836e85, 0x40d4e746, 0xdfbaba36, 0xbeacaa69, 0xb3c84083,
> +  0x8a688eb4, 0x08cde481, 0x66e7a190, 0x74ee1639, 0xb3942a19, 0xe0c40471,
> +  0x9b789489, 0x9751207a, 0x543a1524, 0x41da7ad6, 0x614bb563, 0xf86f57b1,
> +  0x69e62199, 0x2150cb12, 0x9ed74062, 0x429471f4, 0xad28502b, 0xf2e2d4d5,
> +  0x45b6ce09, 0xaaa5e649, 0xb46da484, 0x0a637515, 0xae7a3212, 0x5afc784c,
> +  0x776cfbbe, 0x9c542bb2, 0x64193aa8, 0x16e8a655, 0x4e3d2f92, 0xe05d7b72,
> +  0x89854ebc, 0x8c318814, 0xb81e76e0, 0x3f2625f5, 0x61b44852, 0x5209d7ad,
> +  0x842fe317, 0xd3cfcca1, 0x8d287cc7, 0x80f0c9a8, 0x4215f4e5, 0x563993d6,
> +  0x5d627433, 0xc4449e35, 0x5b4fe009, 0x3ef92286, 0xacbc8927, 0x549ab870,
> +  0x9ac5b959, 0xed8f1c91, 0x7ecf02cd, 0x989c0e8b, 0xa31d6918, 0x1dc2bcc1,
> +  0x99d3f3cc, 0x6857acc8, 0x45d7324a, 0xaebdf2e6, 0x7af2f2ae, 0x09716f73,
> +  0x7816e694, 0xc65493c0, 0x9f7e87bc, 0xaa96cd40, 0xbfb5bfc6, 0x01a2cce7,
> +  0x5f1d8c46, 0x45303efb, 0xb24607c3, 0xef2009a7, 0xba873753, 0xbefb14bc,
> +  0x74e53cd3, 0x70124708, 0x6eb4bdbd, 0xf3ba5e43, 0x4c94085f, 0x0c03e7e0,
> +  0x9a084931, 0x62735424, 0xaeee77c5, 0xdb34f90f, 0x6860cbdd, 0xaf77cf9f,
> +  0x95b28158, 0x23bd70d7, 0x9fbc3d88, 0x742e659e, 0x53bcfb48, 0xb8a63f6c,
> +  0x4dcf3373, 0x2b168627, 0x4fe20745, 0xd0af5e94, 0x22514e6a, 0xb8ef25c2,
> +  0x89ec781a, 0x13d9002b, 0x6d724500, 0x7fdbf63f, 0xb0e9ced5, 0xf919e0f3,
> +  0x00fef203, 0x8905d47a, 0x434e7517, 0x4aef8e2c, 0x689f51e8, 0xe513b7c3,
> +  0x72bbc5d2, 0x3a222f74, 0x05c3a0f9, 0xd5489d82, 0xb41fbe83, 0xec5d305f,
> +  0x5ea02b0b, 0xb176065b, 0xa8eb404e, 0x80349117, 0x210fd49e, 0x43898d0e,
> +  0x6c151b9c, 0x8742df18, 0x7b64de73, 0x1dbf52b2, 0x55c9cb19, 0xeb841f10,
> +  0x10b8ae76, 0x0764ecb6, 0xb7479018, 0x2672cb3f, 0x7ac9ac90, 0x4be5332c,
> +  0x8f1a0615, 0x4efb7a77, 0x16551a85, 0xdb2c3d66, 0x49179c07, 0x5dc4657e,
> +  0x5e76907e, 0xd7486a9c, 0x445204a4, 0x65cdc426, 0x33f86ded, 0xcba95dda,
> +  0x83351f16, 0xfedefad9, 0x639b620f, 0x86896a64, 0xba4099ba, 0x965f4a21,
> +  0x1247154f, 0x25604c42, 0x5862d692, 0xb1e9149e, 0x612516a5, 0x02c49bf8,
> +  0x631212bf, 0x9f69f54e, 0x168b63b0, 0x310a25ba, 0xa42a59cd, 0x084f0af9,
> +  0x44a06cec, 0x5c0cda40, 0xb932d721, 0x7c42bb0d, 0x213cd3f0, 0xedc7f5a4,
> +  0x7fb85859, 0x6b3da5ea, 0x61cd591e, 0xe8e9aa08, 0x4361fc34, 0x53d40d2a,
> +  0x0511ad1b, 0xf996b44c, 0xb5ead756, 0xc022138d, 0x6172adf1, 0xa4a0a3b4,
> +  0x8c2977b8, 0xa8e482ed, 0x04fcdd6b, 0x3f7b85d4, 0x4fca1e46, 0xa392ddca,
> +  0x569fc791, 0x346a706c, 0x543bf3eb, 0x895b3cde, 0x2146bb80, 0x26b3c168,
> +  0x929998db, 0x1ea472c9, 0x7207b36b, 0x6a8f10d4 
> +};
>
Paul A. Clarke Oct. 26, 2018, 8:19 p.m. UTC | #3
On 10/26/2018 02:02 PM, Bill Schmidt wrote:
> On 10/25/18 2:08 PM, Paul Clarke wrote:

>> diff --git a/trunk/gcc/testsuite/gcc.target/powerpc/ssse3-check.h b/trunk/gcc/testsuite/gcc.target/powerpc/ssse3-check.h
>> new file mode 10644
>> --- /dev/null	(revision 0)
>> +++ b/trunk/gcc/testsuite/gcc.target/powerpc/ssse3-check.h	(working copy)
>> @@ -0,0 +1,43 @@
>> +#include <stdio.h>
>> +#include <stdlib.h>
>> +
>> +#include "m128-check.h"
>> +
>> +/* define DEBUG replace abort with printf on error.  */
> 
> One nit -- this comment appears to be incorrect, as the only place DEBUG is used,
> you don't have abort() anywhere.
> 
> (I have a patch under review that questions why we would replace abort() rather
> than supplement it with printf, anyway...)

You are correct.  That comment was copied without consideration, is incorrect, and should just be deleted.  It looks like the abort() issue you are fixing in your patch is not present in the new(-ish) test cases in this patch.

I'll commit a new (trivial/obvious) change.

PC
diff mbox series

Patch

Index: gcc/testsuite/gcc.target/powerpc/ssse3-check.h
===================================================================
diff --git a/trunk/gcc/testsuite/gcc.target/powerpc/ssse3-check.h b/trunk/gcc/testsuite/gcc.target/powerpc/ssse3-check.h
new file mode 10644
--- /dev/null	(revision 0)
+++ b/trunk/gcc/testsuite/gcc.target/powerpc/ssse3-check.h	(working copy)
@@ -0,0 +1,43 @@ 
+#include <stdio.h>
+#include <stdlib.h>
+
+#include "m128-check.h"
+
+/* define DEBUG replace abort with printf on error.  */
+//#define DEBUG 1
+
+#define TEST ssse3_test
+
+static void ssse3_test (void);
+
+static void
+__attribute__ ((noinline))
+do_test (void)
+{
+  ssse3_test ();
+}
+
+int
+main ()
+{
+#ifdef __BUILTIN_CPU_SUPPORTS__
+  /* Most SSE intrinsic operations can be implemented via VMX
+     instructions, but some operations may be faster / simpler
+     using the POWER8 VSX instructions.  This is especially true
+     when we are transferring / converting to / from __m64 types.
+     The direct register transfer instructions from POWER8 are
+     especially important.  So we test for arch_2_07.  */
+  if (__builtin_cpu_supports ("arch_2_07"))
+    {
+      do_test ();
+#ifdef DEBUG
+      printf ("PASSED\n");
+#endif
+    }
+#ifdef DEBUG
+  else
+    printf ("SKIPPED\n");
+#endif
+#endif /* __BUILTIN_CPU_SUPPORTS__ */
+  return 0;
+}
Index: gcc/testsuite/gcc.target/powerpc/ssse3-pabsb.c
===================================================================
diff --git a/trunk/gcc/testsuite/gcc.target/powerpc/ssse3-pabsb.c b/trunk/gcc/testsuite/gcc.target/powerpc/ssse3-pabsb.c
new file mode 10644
--- /dev/null	(revision 0)
+++ b/trunk/gcc/testsuite/gcc.target/powerpc/ssse3-pabsb.c	(working copy)
@@ -0,0 +1,80 @@ 
+/* { dg-do run } */
+/* { dg-options "-O3 -mpower8-vector -Wno-psabi" } */
+/* { dg-require-effective-target p8vector_hw } */
+
+#ifndef CHECK_H
+#define CHECK_H "ssse3-check.h"
+#endif
+
+#ifndef TEST
+#define TEST ssse3_test
+#endif
+
+#include CHECK_H
+
+#include "ssse3-vals.h"
+#include <tmmintrin.h>
+
+#ifndef __AVX__
+/* Test the 64-bit form */
+static void
+ssse3_test_pabsb (int *i1, int *r)
+{
+  __m64 t1 = *(__m64 *) i1;
+  *(__m64 *) r = _mm_abs_pi8 (t1);
+  _mm_empty ();
+}
+#endif
+
+/* Test the 128-bit form */
+static void
+ssse3_test_pabsb128 (int *i1, int *r)
+{
+  /* Assumes incoming pointers are 16-byte aligned */
+  __m128i t1 = *(__m128i *) i1;
+  *(__m128i *) r = _mm_abs_epi8 (t1);
+}
+
+/* Routine to manually compute the results */
+static void
+compute_correct_result (int *i1, int *r)
+{
+  char *b1 = (char *) i1;
+  char *bout = (char *) r;
+  int i;
+
+  for (i = 0; i < 16; i++)
+    if (b1[i] < 0)
+      bout[i] = -b1[i];
+    else
+      bout[i] = b1[i];
+}
+
+static void
+TEST (void)
+{
+  int i;
+  int r [4] __attribute__ ((aligned(16)));
+  int ck [4];
+  int fail = 0;
+
+  for (i = 0; i < 256; i += 4)
+    {
+      /* Manually compute the result */
+      compute_correct_result(&vals[i + 0], ck);
+
+#ifndef __AVX__
+      /* Run the 64-bit tests */
+      ssse3_test_pabsb (&vals[i + 0], &r[0]);
+      ssse3_test_pabsb (&vals[i + 2], &r[2]);
+      fail += chk_128 (ck, r);
+#endif
+
+      /* Run the 128-bit tests */
+      ssse3_test_pabsb128 (&vals[i + 0], r);
+      fail += chk_128 (ck, r);
+    }
+
+  if (fail != 0)
+    abort ();
+}
Index: gcc/testsuite/gcc.target/powerpc/ssse3-pabsd.c
===================================================================
diff --git a/trunk/gcc/testsuite/gcc.target/powerpc/ssse3-pabsd.c b/trunk/gcc/testsuite/gcc.target/powerpc/ssse3-pabsd.c
new file mode 10644
--- /dev/null	(revision 0)
+++ b/trunk/gcc/testsuite/gcc.target/powerpc/ssse3-pabsd.c	(working copy)
@@ -0,0 +1,79 @@ 
+/* { dg-do run } */
+/* { dg-options "-O3 -mpower8-vector -Wno-psabi" } */
+/* { dg-require-effective-target p8vector_hw } */
+
+#ifndef CHECK_H
+#define CHECK_H "ssse3-check.h"
+#endif
+
+#ifndef TEST
+#define TEST ssse3_test
+#endif
+
+#include CHECK_H
+
+#include "ssse3-vals.h"
+
+#include <tmmintrin.h>
+
+#ifndef __AVX__
+/* Test the 64-bit form */
+static void
+ssse3_test_pabsd (int *i1, int *r)
+{
+  __m64 t1 = *(__m64 *) i1;
+  *(__m64 *) r = _mm_abs_pi32 (t1);
+  _mm_empty ();
+}
+#endif
+
+/* Test the 128-bit form */
+static void
+ssse3_test_pabsd128 (int *i1, int *r)
+{
+  /* Assumes incoming pointers are 16-byte aligned */
+  __m128i t1 = *(__m128i *) i1;
+  *(__m128i *) r = _mm_abs_epi32 (t1);
+}
+
+/* Routine to manually compute the results */
+static void
+compute_correct_result (int *i1, int *r)
+{
+  int i;
+
+  for (i = 0; i < 4; i++)
+    if (i1[i] < 0)
+      r[i] = -i1[i];
+    else
+      r[i] = i1[i];
+}
+
+static void
+TEST (void)
+{
+  int i;
+  int r [4] __attribute__ ((aligned(16)));
+  int ck [4];
+  int fail = 0;
+
+  for (i = 0; i < 256; i += 4)
+    {
+      /* Manually compute the result */
+      compute_correct_result(&vals[i + 0], ck);
+
+#ifndef __AVX__
+      /* Run the 64-bit tests */
+      ssse3_test_pabsd (&vals[i + 0], &r[0]);
+      ssse3_test_pabsd (&vals[i + 2], &r[2]);
+      fail += chk_128 (ck, r);
+#endif
+
+      /* Run the 128-bit tests */
+      ssse3_test_pabsd128 (&vals[i + 0], r);
+      fail += chk_128(ck, r);
+    }
+
+  if (fail != 0)
+    abort ();
+}
Index: gcc/testsuite/gcc.target/powerpc/ssse3-pabsw.c
===================================================================
diff --git a/trunk/gcc/testsuite/gcc.target/powerpc/ssse3-pabsw.c b/trunk/gcc/testsuite/gcc.target/powerpc/ssse3-pabsw.c
new file mode 10644
--- /dev/null	(revision 0)
+++ b/trunk/gcc/testsuite/gcc.target/powerpc/ssse3-pabsw.c	(working copy)
@@ -0,0 +1,81 @@ 
+/* { dg-do run } */
+/* { dg-options "-O3 -mpower8-vector -Wno-psabi" } */
+/* { dg-require-effective-target p8vector_hw } */
+
+#ifndef CHECK_H
+#define CHECK_H "ssse3-check.h"
+#endif
+
+#ifndef TEST
+#define TEST ssse3_test
+#endif
+
+#include CHECK_H
+
+#include "ssse3-vals.h"
+
+#include <tmmintrin.h>
+
+#ifndef __AVX__
+/* Test the 64-bit form */
+static void
+ssse3_test_pabsw (int *i1, int *r)
+{
+  __m64 t1 = *(__m64 *) i1;
+  *(__m64 *) r = _mm_abs_pi16 (t1);
+  _mm_empty ();
+}
+#endif
+
+/* Test the 128-bit form */
+static void
+ssse3_test_pabsw128 (int *i1, int *r)
+{
+  /* Assumes incoming pointers are 16-byte aligned */
+  __m128i t1 = *(__m128i *) i1;
+  *(__m128i *) r = _mm_abs_epi16 (t1);
+}
+
+/* Routine to manually compute the results */
+static void
+compute_correct_result (int *i1, int *r)
+{
+  short *s1 = (short *) i1;
+  short *sout = (short *) r;
+  int i;
+
+  for (i = 0; i < 8; i++)
+    if (s1[i] < 0)
+      sout[i] = -s1[i];
+    else
+      sout[i] = s1[i];
+}
+
+static void
+TEST (void)
+{
+  int i;
+  int r [4] __attribute__ ((aligned(16)));
+  int ck [4];
+  int fail = 0;
+
+  for (i = 0; i < 256; i += 4)
+    {
+      /* Manually compute the result */
+      compute_correct_result (&vals[i + 0], ck);
+
+#ifndef __AVX__
+      /* Run the 64-bit tests */
+      ssse3_test_pabsw (&vals[i + 0], &r[0]);
+      ssse3_test_pabsw (&vals[i + 2], &r[2]);
+      fail += chk_128 (ck, r);
+#endif
+
+      /* Run the 128-bit tests */
+      ssse3_test_pabsw128 (&vals[i + 0], r);
+      fail += chk_128 (ck, r);
+    }
+  
+  if (fail != 0)
+    abort ();
+}
Index: gcc/testsuite/gcc.target/powerpc/ssse3-palignr.c
===================================================================
diff --git a/trunk/gcc/testsuite/gcc.target/powerpc/ssse3-palignr.c b/trunk/gcc/testsuite/gcc.target/powerpc/ssse3-palignr.c
new file mode 10644
--- /dev/null	(revision 0)
+++ b/trunk/gcc/testsuite/gcc.target/powerpc/ssse3-palignr.c	(working copy)
@@ -0,0 +1,279 @@ 
+/* { dg-do run } */
+/* { dg-options "-O3 -mpower8-vector -Wno-psabi" } */
+/* { dg-require-effective-target p8vector_hw } */
+
+#ifndef CHECK_H
+#define CHECK_H "ssse3-check.h"
+#endif
+
+#ifndef TEST
+#define TEST ssse3_test
+#endif
+
+#include CHECK_H
+
+#include "ssse3-vals.h"
+
+#include <tmmintrin.h>
+#include <string.h>
+
+#ifndef __AVX__
+/* Test the 64-bit form */
+static void
+ssse3_test_palignr (int *i1, int *i2, unsigned int imm, int *r)
+{
+  __m64 t1 = *(__m64 *) i1;
+  __m64 t2 = *(__m64 *) i2;
+
+  switch (imm)
+    {
+    case 0:
+      *(__m64 *) r = _mm_alignr_pi8 (t1, t2, 0);
+      break;
+    case 1:
+      *(__m64 *) r = _mm_alignr_pi8 (t1, t2, 1);
+      break;
+    case 2:
+      *(__m64 *) r = _mm_alignr_pi8 (t1, t2, 2);
+      break;
+    case 3:
+      *(__m64 *) r = _mm_alignr_pi8 (t1, t2, 3);
+      break;
+    case 4:
+      *(__m64 *) r = _mm_alignr_pi8 (t1, t2, 4);
+      break;
+    case 5:
+      *(__m64 *) r = _mm_alignr_pi8 (t1, t2, 5);
+      break;
+    case 6:
+      *(__m64 *) r = _mm_alignr_pi8 (t1, t2, 6);
+      break;
+    case 7:
+      *(__m64 *) r = _mm_alignr_pi8 (t1, t2, 7);
+      break;
+    case 8:
+      *(__m64 *) r = _mm_alignr_pi8 (t1, t2, 8);
+      break;
+    case 9:
+      *(__m64 *) r = _mm_alignr_pi8 (t1, t2, 9);
+      break;
+    case 10:
+      *(__m64 *) r = _mm_alignr_pi8 (t1, t2, 10);
+      break;
+    case 11:
+      *(__m64 *) r = _mm_alignr_pi8 (t1, t2, 11);
+      break;
+    case 12:
+      *(__m64 *) r = _mm_alignr_pi8 (t1, t2, 12);
+      break;
+    case 13:
+      *(__m64 *) r = _mm_alignr_pi8 (t1, t2, 13);
+      break;
+    case 14:
+      *(__m64 *) r = _mm_alignr_pi8 (t1, t2, 14);
+      break;
+    case 15:
+      *(__m64 *) r = _mm_alignr_pi8 (t1, t2, 15);
+      break;
+    default:
+      *(__m64 *) r = _mm_alignr_pi8 (t1, t2, 16);
+      break;
+    }
+
+   _mm_empty();
+}
+#endif
+
+/* Test the 128-bit form */
+static void
+ssse3_test_palignr128 (int *i1, int *i2, unsigned int imm, int *r)
+{
+  /* Assumes incoming pointers are 16-byte aligned */
+  __m128i t1 = *(__m128i *) i1;
+  __m128i t2 = *(__m128i *) i2;
+
+  switch (imm)
+    {
+    case 0:
+      *(__m128i *) r = _mm_alignr_epi8 (t1, t2, 0);
+      break;
+    case 1:
+      *(__m128i *) r = _mm_alignr_epi8 (t1, t2, 1);
+      break;
+    case 2:
+      *(__m128i *) r = _mm_alignr_epi8 (t1, t2, 2);
+      break;
+    case 3:
+      *(__m128i *) r = _mm_alignr_epi8 (t1, t2, 3);
+      break;
+    case 4:
+      *(__m128i *) r = _mm_alignr_epi8 (t1, t2, 4);
+      break;
+    case 5:
+      *(__m128i *) r = _mm_alignr_epi8 (t1, t2, 5);
+      break;
+    case 6:
+      *(__m128i *) r = _mm_alignr_epi8 (t1, t2, 6);
+      break;
+    case 7:
+      *(__m128i *) r = _mm_alignr_epi8 (t1, t2, 7);
+      break;
+    case 8:
+      *(__m128i *) r = _mm_alignr_epi8 (t1, t2, 8);
+      break;
+    case 9:
+      *(__m128i *) r = _mm_alignr_epi8 (t1, t2, 9);
+      break;
+    case 10:
+      *(__m128i *) r = _mm_alignr_epi8 (t1, t2, 10);
+      break;
+    case 11:
+      *(__m128i *) r = _mm_alignr_epi8 (t1, t2, 11);
+      break;
+    case 12:
+      *(__m128i *) r = _mm_alignr_epi8 (t1, t2, 12);
+      break;
+    case 13:
+      *(__m128i *) r = _mm_alignr_epi8 (t1, t2, 13);
+      break;
+    case 14:
+      *(__m128i *) r = _mm_alignr_epi8 (t1, t2, 14);
+      break;
+    case 15:
+      *(__m128i *) r = _mm_alignr_epi8 (t1, t2, 15);
+      break;
+    case 16:
+      *(__m128i *) r = _mm_alignr_epi8 (t1, t2, 16);
+      break;
+    case 17:
+      *(__m128i *) r = _mm_alignr_epi8 (t1, t2, 17);
+      break;
+    case 18:
+      *(__m128i *) r = _mm_alignr_epi8 (t1, t2, 18);
+      break;
+    case 19:
+      *(__m128i *) r = _mm_alignr_epi8 (t1, t2, 19);
+      break;
+    case 20:
+      *(__m128i *) r = _mm_alignr_epi8 (t1, t2, 20);
+      break;
+    case 21:
+      *(__m128i *) r = _mm_alignr_epi8 (t1, t2, 21);
+      break;
+    case 22:
+      *(__m128i *) r = _mm_alignr_epi8 (t1, t2, 22);
+      break;
+    case 23:
+      *(__m128i *) r = _mm_alignr_epi8 (t1, t2, 23);
+      break;
+    case 24:
+      *(__m128i *) r = _mm_alignr_epi8 (t1, t2, 24);
+      break;
+    case 25:
+      *(__m128i *) r = _mm_alignr_epi8 (t1, t2, 25);
+      break;
+    case 26:
+      *(__m128i *) r = _mm_alignr_epi8 (t1, t2, 26);
+      break;
+    case 27:
+      *(__m128i *) r = _mm_alignr_epi8 (t1, t2, 27);
+      break;
+    case 28:
+      *(__m128i *) r = _mm_alignr_epi8 (t1, t2, 28);
+      break;
+    case 29:
+      *(__m128i *) r = _mm_alignr_epi8 (t1, t2, 29);
+      break;
+    case 30:
+      *(__m128i *) r = _mm_alignr_epi8 (t1, t2, 30);
+      break;
+    case 31:
+      *(__m128i *) r = _mm_alignr_epi8 (t1, t2, 31);
+      break;
+    default:
+      *(__m128i *) r = _mm_alignr_epi8 (t1, t2, 32);
+      break;
+    }
+}
+
+/* Routine to manually compute the results */
+static void
+compute_correct_result_128 (int *i1, int *i2, unsigned int imm, int *r)
+{
+  char buf [32];
+  char *bout = (char *) r;
+  int i;
+
+  memcpy (&buf[0], i2, 16);
+  memcpy (&buf[16], i1, 16);
+
+  for (i = 0; i < 16; i++)
+    if (imm >= 32 || imm + i >= 32)
+      bout[i] = 0;
+    else
+      bout[i] = buf[imm + i];
+}
+
+#ifndef __AVX__
+static void
+compute_correct_result_64 (int *i1, int *i2, unsigned int imm, int *r)
+{
+  char buf [16];
+  char *bout = (char *)r;
+  int i;
+
+  /* Handle the first half */
+  memcpy (&buf[0], i2, 8);
+  memcpy (&buf[8], i1, 8);
+
+  for (i = 0; i < 8; i++)
+    if (imm >= 16 || imm + i >= 16)
+      bout[i] = 0;
+    else
+      bout[i] = buf[imm + i];
+
+  /* Handle the second half */
+  memcpy (&buf[0], &i2[2], 8);
+  memcpy (&buf[8], &i1[2], 8);
+
+  for (i = 0; i < 8; i++)
+    if (imm >= 16 || imm + i >= 16)
+      bout[i + 8] = 0;
+    else
+      bout[i + 8] = buf[imm + i];
+}
+#endif
+
+static void
+TEST (void)
+{
+  int i;
+  int r [4] __attribute__ ((aligned(16)));
+  int ck [4];
+  unsigned int imm;
+  int fail = 0;
+
+  for (i = 0; i < 256; i += 8)
+    for (imm = 0; imm < 100; imm++)
+      {
+#ifndef __AVX__
+	/* Manually compute the result */
+	compute_correct_result_64 (&vals[i + 0], &vals[i + 4], imm, ck);
+
+	/* Run the 64-bit tests */
+	ssse3_test_palignr (&vals[i + 0], &vals[i + 4], imm, &r[0]);
+	ssse3_test_palignr (&vals[i + 2], &vals[i + 6], imm, &r[2]);
+	fail += chk_128 (ck, r);
+#endif
+
+	/* Recompute the results for 128-bits */
+	compute_correct_result_128 (&vals[i + 0], &vals[i + 4], imm, ck);
+
+	/* Run the 128-bit tests */
+	ssse3_test_palignr128 (&vals[i + 0], &vals[i + 4], imm, r);
+	fail += chk_128 (ck, r);
+      }
+
+  if (fail != 0)
+    abort ();
+}
Index: gcc/testsuite/gcc.target/powerpc/ssse3-phaddd.c
===================================================================
diff --git a/trunk/gcc/testsuite/gcc.target/powerpc/ssse3-phaddd.c b/trunk/gcc/testsuite/gcc.target/powerpc/ssse3-phaddd.c
new file mode 10644
--- /dev/null	(revision 0)
+++ b/trunk/gcc/testsuite/gcc.target/powerpc/ssse3-phaddd.c	(working copy)
@@ -0,0 +1,81 @@ 
+/* { dg-do run } */
+/* { dg-options "-O3 -mpower8-vector -Wno-psabi" } */
+/* { dg-require-effective-target p8vector_hw } */
+
+#ifndef CHECK_H
+#define CHECK_H "ssse3-check.h"
+#endif
+
+#ifndef TEST
+#define TEST ssse3_test
+#endif
+
+#include CHECK_H
+
+#include "ssse3-vals.h"
+
+#include <tmmintrin.h>
+
+#ifndef __AVX__
+/* Test the 64-bit form */
+static void
+ssse3_test_phaddd (int *i1, int *i2, int *r)
+{
+  __m64 t1 = *(__m64 *) i1;
+  __m64 t2 = *(__m64 *) i2;
+  *(__m64 *) r = _mm_hadd_pi32 (t1, t2);
+  _mm_empty();
+}
+#endif
+
+/* Test the 128-bit form */
+static void
+ssse3_test_phaddd128 (int *i1, int *i2, int *r)
+{
+  /* Assumes incoming pointers are 16-byte aligned */
+  __m128i t1 = *(__m128i *) i1;
+  __m128i t2 = *(__m128i *) i2;
+  *(__m128i *) r = _mm_hadd_epi32 (t1, t2);
+}
+
+/* Routine to manually compute the results */
+static void
+compute_correct_result(int *i1, int *i2, int *r)
+{
+  int i;
+
+  for (i = 0; i < 2; i++)
+    r[i] = i1[2 * i] + i1[2 * i + 1];
+  for (i = 0; i < 2; i++)
+    r[i + 2] = i2[2 * i] + i2[2 * i + 1];
+}
+
+static void
+TEST (void)
+{
+  int i;
+  int r [4] __attribute__ ((aligned(16)));
+  int ck [4];
+  int fail = 0;
+
+
+  for (i = 0; i < 256; i += 8)
+    {
+      /* Manually compute the result */
+      compute_correct_result (&vals[i + 0], &vals[i + 4], ck);
+
+#ifndef __AVX__
+      /* Run the 64-bit tests */
+      ssse3_test_phaddd (&vals[i + 0], &vals[i + 2], &r[0]);
+      ssse3_test_phaddd (&vals[i + 4], &vals[i + 6], &r[2]);
+      fail += chk_128 (ck, r);
+#endif
+
+      /* Run the 128-bit tests */
+      ssse3_test_phaddd128 (&vals[i + 0], &vals[i + 4], r);
+      fail += chk_128 (ck, r);
+    }
+
+  if (fail != 0)
+    abort ();
+}
Index: gcc/testsuite/gcc.target/powerpc/ssse3-phaddsw.c
===================================================================
diff --git a/trunk/gcc/testsuite/gcc.target/powerpc/ssse3-phaddsw.c b/trunk/gcc/testsuite/gcc.target/powerpc/ssse3-phaddsw.c
new file mode 10644
--- /dev/null	(revision 0)
+++ b/trunk/gcc/testsuite/gcc.target/powerpc/ssse3-phaddsw.c	(working copy)
@@ -0,0 +1,95 @@ 
+/* { dg-do run } */
+/* { dg-options "-O3 -mpower8-vector -Wno-psabi" } */
+/* { dg-require-effective-target p8vector_hw } */
+
+#ifndef CHECK_H
+#define CHECK_H "ssse3-check.h"
+#endif
+
+#ifndef TEST
+#define TEST ssse3_test
+#endif
+
+#include CHECK_H
+
+#include "ssse3-vals.h"
+
+#include <tmmintrin.h>
+
+#ifndef __AVX__
+/* Test the 64-bit form */
+static void
+ssse3_test_phaddsw (int *i1, int *i2, int *r)
+{
+  __m64 t1 = *(__m64 *) i1;
+  __m64 t2 = *(__m64 *) i2;
+  *(__m64 *) r = _mm_hadds_pi16 (t1, t2);
+  _mm_empty ();
+}
+#endif
+
+/* Test the 128-bit form */
+static void
+ssse3_test_phaddsw128 (int *i1, int *i2, int *r)
+{
+  /* Assumes incoming pointers are 16-byte aligned */
+  __m128i t1 = *(__m128i *) i1;
+  __m128i t2 = *(__m128i *) i2;
+ *(__m128i *) r = _mm_hadds_epi16 (t1, t2);
+}
+
+static short
+signed_saturate_to_word (int x)
+{
+  if (x > (int) 0x7fff)
+    return 0x7fff;
+
+  if (x < (int) 0xffff8000)
+    return 0x8000;
+
+  return (short) x;
+}
+
+/* Routine to manually compute the results */
+static void
+compute_correct_result (int *i1, int *i2, int *r)
+{
+  short *s1 = (short *) i1;
+  short *s2 = (short *) i2;
+  short *sout = (short *) r;
+  int i;
+
+  for (i = 0; i < 4; i++)
+    sout[i] = signed_saturate_to_word(s1[2 * i] + s1[2 * i + 1]);
+  for (i = 0; i < 4; i++)
+    sout[i + 4] = signed_saturate_to_word(s2[2 * i] + s2[2 * i + 1]);
+}
+
+static void
+TEST (void)
+{
+  int i;
+  int r [4] __attribute__ ((aligned(16)));
+  int ck [4];
+  int fail = 0;
+
+  for (i = 0; i < 256; i += 8)
+    {
+      /* Manually compute the result */
+      compute_correct_result (&vals[i + 0], &vals[i + 4], ck);
+
+#ifndef __AVX__
+      /* Run the 64-bit tests */
+      ssse3_test_phaddsw (&vals[i + 0], &vals[i + 2], &r[0]);
+      ssse3_test_phaddsw (&vals[i + 4], &vals[i + 6], &r[2]);
+      fail += chk_128 (ck, r);
+#endif
+
+      /* Run the 128-bit tests */
+      ssse3_test_phaddsw128 (&vals[i + 0], &vals[i + 4], r);
+      fail += chk_128 (ck, r);
+    }
+
+  if (fail != 0)
+    abort ();
+}
Index: gcc/testsuite/gcc.target/powerpc/ssse3-phaddw.c
===================================================================
diff --git a/trunk/gcc/testsuite/gcc.target/powerpc/ssse3-phaddw.c b/trunk/gcc/testsuite/gcc.target/powerpc/ssse3-phaddw.c
new file mode 10644
--- /dev/null	(revision 0)
+++ b/trunk/gcc/testsuite/gcc.target/powerpc/ssse3-phaddw.c	(working copy)
@@ -0,0 +1,84 @@ 
+/* { dg-do run } */
+/* { dg-options "-O3 -mpower8-vector -Wno-psabi" } */
+/* { dg-require-effective-target p8vector_hw } */
+
+#ifndef CHECK_H
+#define CHECK_H "ssse3-check.h"
+#endif
+
+#ifndef TEST
+#define TEST ssse3_test
+#endif
+
+#include CHECK_H
+
+#include "ssse3-vals.h"
+
+#include <tmmintrin.h>
+
+#ifndef __AVX__
+/* Test the 64-bit form */
+static void
+ssse3_test_phaddw (int *i1, int *i2, int *r)
+{
+  __m64 t1 = *(__m64 *) i1;
+  __m64 t2 = *(__m64 *) i2;
+  *(__m64 *) r = _mm_hadd_pi16 (t1, t2);
+  _mm_empty ();
+}
+#endif
+
+/* Test the 128-bit form */
+static void
+ssse3_test_phaddw128 (int *i1, int *i2, int *r)
+{
+  /* Assumes incoming pointers are 16-byte aligned */
+  __m128i t1 = *(__m128i *) i1;
+  __m128i t2 = *(__m128i *) i2;
+  *(__m128i *) r = _mm_hadd_epi16 (t1, t2);
+}
+
+/* Routine to manually compute the results */
+static void
+compute_correct_result(int *i1, int *i2, int *r)
+{
+  short *s1 = (short *) i1;
+  short *s2 = (short *) i2;
+  short *sout = (short *) r;
+  int i;
+
+  for (i = 0; i < 4; i++)
+    sout[i] = s1[2 * i] + s1[2 * i + 1];
+
+  for (i = 0; i < 4; i++)
+    sout[i + 4] = s2[2 * i] + s2[2 * i + 1];
+}
+
+static void
+TEST (void)
+{
+  int i;
+  int r [4] __attribute__ ((aligned(16)));
+  int ck [4];
+  int fail = 0;
+
+  for (i = 0; i < 256; i += 8)
+    {
+      /* Manually compute the result */
+      compute_correct_result (&vals[i + 0], &vals[i + 4], ck);
+
+#ifndef __AVX__
+      /* Run the 64-bit tests */
+      ssse3_test_phaddw (&vals[i + 0], &vals[i + 2], &r[0]);
+      ssse3_test_phaddw (&vals[i + 4], &vals[i + 6], &r[2]);
+      fail += chk_128 (ck, r);
+#endif
+
+      /* Run the 128-bit tests */
+      ssse3_test_phaddw128 (&vals[i + 0], &vals[i + 4], r);
+      fail += chk_128 (ck, r);
+    }
+
+  if (fail != 0)
+    abort ();
+}
Index: gcc/testsuite/gcc.target/powerpc/ssse3-phsubd.c
===================================================================
diff --git a/trunk/gcc/testsuite/gcc.target/powerpc/ssse3-phsubd.c b/trunk/gcc/testsuite/gcc.target/powerpc/ssse3-phsubd.c
new file mode 10644
--- /dev/null	(revision 0)
+++ b/trunk/gcc/testsuite/gcc.target/powerpc/ssse3-phsubd.c	(working copy)
@@ -0,0 +1,80 @@ 
+/* { dg-do run } */
+/* { dg-options "-O3 -mpower8-vector -Wno-psabi" } */
+/* { dg-require-effective-target p8vector_hw } */
+
+#ifndef CHECK_H
+#define CHECK_H "ssse3-check.h"
+#endif
+
+#ifndef TEST
+#define TEST ssse3_test
+#endif
+
+#include CHECK_H
+
+#include "ssse3-vals.h"
+
+#include <tmmintrin.h>
+
+#ifndef __AVX__
+/* Test the 64-bit form */
+static void
+ssse3_test_phsubd (int *i1, int *i2, int *r)
+{
+  __m64 t1 = *(__m64 *) i1;
+  __m64 t2 = *(__m64 *) i2;
+  *(__m64 *) r = _mm_hsub_pi32(t1, t2);
+  _mm_empty ();
+}
+#endif
+
+/* Test the 128-bit form */
+static void
+ssse3_test_phsubd128 (int *i1, int *i2, int *r)
+{
+  /* Assumes incoming pointers are 16-byte aligned */
+  __m128i t1 = *(__m128i *) i1;
+  __m128i t2 = *(__m128i *) i2;
+  *(__m128i *) r = _mm_hsub_epi32 (t1, t2);
+}
+
+/* Routine to manually compute the results */
+static void
+compute_correct_result (int *i1, int *i2, int *r)
+{
+  int i;
+
+  for (i = 0; i < 2; i++)
+    r[i] = i1[2 * i] - i1[2 * i + 1];
+  for (i = 0; i < 2; i++)
+    r[i + 2] = i2[2 * i] - i2[2 * i + 1];
+}
+
+static void
+TEST (void)
+{
+  int i;
+  int r [4] __attribute__ ((aligned(16)));
+  int ck [4];
+  int fail = 0;
+
+  for (i = 0; i < 256; i += 8)
+    {
+      /* Manually compute the result */
+      compute_correct_result (&vals[i + 0], &vals[i + 4], ck);
+
+#ifndef __AVX__
+      /* Run the 64-bit tests */
+      ssse3_test_phsubd (&vals[i + 0], &vals[i + 2], &r[0]);
+      ssse3_test_phsubd (&vals[i + 4], &vals[i + 6], &r[2]);
+      fail += chk_128 (ck, r);
+#endif
+
+      /* Run the 128-bit tests */
+      ssse3_test_phsubd128 (&vals[i + 0], &vals[i + 4], r);
+      fail += chk_128 (ck, r);
+    }
+
+  if (fail != 0)
+    abort ();
+}
Index: gcc/testsuite/gcc.target/powerpc/ssse3-phsubsw.c
===================================================================
diff --git a/trunk/gcc/testsuite/gcc.target/powerpc/ssse3-phsubsw.c b/trunk/gcc/testsuite/gcc.target/powerpc/ssse3-phsubsw.c
new file mode 10644
--- /dev/null	(revision 0)
+++ b/trunk/gcc/testsuite/gcc.target/powerpc/ssse3-phsubsw.c	(working copy)
@@ -0,0 +1,98 @@ 
+/* { dg-do run } */
+/* { dg-options "-O3 -mpower8-vector -Wno-psabi" } */
+/* { dg-require-effective-target p8vector_hw } */
+
+#ifndef CHECK_H
+#define CHECK_H "ssse3-check.h"
+#endif
+
+#ifndef TEST
+#define TEST ssse3_test
+#endif
+
+#include CHECK_H
+
+#include "ssse3-vals.h"
+
+#include <tmmintrin.h>
+
+#ifndef __AVX__
+/* Test the 64-bit form */
+static void
+ssse3_test_phsubsw (int *i1, int *i2, int *r)
+{
+  __m64 t1 = *(__m64 *) i1;
+  __m64 t2 = *(__m64 *) i2;
+
+  *(__m64 *) r = _mm_hsubs_pi16 (t1, t2);
+
+  _mm_empty ();
+}
+#endif
+
+/* Test the 128-bit form */
+static void
+ssse3_test_phsubsw128 (int *i1, int *i2, int *r)
+{
+  /* Assumes incoming pointers are 16-byte aligned */
+  __m128i t1 = *(__m128i *) i1;
+  __m128i t2 = *(__m128i *) i2;
+  *(__m128i *) r = _mm_hsubs_epi16 (t1, t2);
+}
+
+static short
+signed_saturate_to_word (int x)
+{
+  if (x > (int )0x7fff)
+    return 0x7fff;
+
+  if (x < (int) 0xffff8000)
+    return 0x8000;
+
+  return (short)x;
+}
+
+/* Routine to manually compute the results */
+static void
+compute_correct_result (int *i1, int *i2, int *r)
+{
+  short *s1 = (short *) i1;
+  short *s2 = (short *) i2;
+  short *sout = (short *) r;
+  int i;
+
+  for (i = 0; i < 4; i++)
+    sout[i] = signed_saturate_to_word (s1[2 * i] - s1[2 * i + 1]);
+
+  for (i = 0; i < 4; i++)
+    sout[i + 4] = signed_saturate_to_word (s2[2 * i] - s2[2 * i + 1]);
+}
+
+static void
+TEST (void)
+{
+  int i;
+  int r [4] __attribute__ ((aligned(16)));
+  int ck [4];
+  int fail = 0;
+
+  for (i = 0; i < 256; i += 8)
+    {
+      /* Manually compute the result */
+      compute_correct_result (&vals[i + 0], &vals[i + 4], ck);
+
+#ifndef __AVX__
+      /* Run the 64-bit tests */
+      ssse3_test_phsubsw (&vals[i + 0], &vals[i + 2], &r[0]);
+      ssse3_test_phsubsw (&vals[i + 4], &vals[i + 6], &r[2]);
+      fail += chk_128 (ck, r);
+#endif
+
+      /* Run the 128-bit tests */
+      ssse3_test_phsubsw128 (&vals[i + 0], &vals[i + 4], r);
+      fail += chk_128 (ck, r);
+    }
+
+  if (fail != 0)
+    abort ();
+}
Index: gcc/testsuite/gcc.target/powerpc/ssse3-phsubw.c
===================================================================
diff --git a/trunk/gcc/testsuite/gcc.target/powerpc/ssse3-phsubw.c b/trunk/gcc/testsuite/gcc.target/powerpc/ssse3-phsubw.c
new file mode 10644
--- /dev/null	(revision 0)
+++ b/trunk/gcc/testsuite/gcc.target/powerpc/ssse3-phsubw.c	(working copy)
@@ -0,0 +1,83 @@ 
+/* { dg-do run } */
+/* { dg-options "-O3 -mpower8-vector -Wno-psabi" } */
+/* { dg-require-effective-target p8vector_hw } */
+
+#ifndef CHECK_H
+#define CHECK_H "ssse3-check.h"
+#endif
+
+#ifndef TEST
+#define TEST ssse3_test
+#endif
+
+#include CHECK_H
+#include "ssse3-vals.h"
+
+#include <tmmintrin.h>
+
+#ifndef __AVX__
+/* Test the 64-bit form */
+static void
+ssse3_test_phsubw (int *i1, int *i2, int *r)
+{
+  __m64 t1 = *(__m64 *) i1;
+  __m64 t2 = *(__m64 *) i2;
+  *(__m64 *) r = _mm_hsub_pi16 (t1, t2);
+  _mm_empty ();
+}
+#endif
+
+/* Test the 128-bit form */
+static void
+ssse3_test_phsubw128 (int *i1, int *i2, int *r)
+{
+  /* Assumes incoming pointers are 16-byte aligned */
+  __m128i t1 = *(__m128i *) i1;
+  __m128i t2 = *(__m128i *) i2;
+
+  *(__m128i *) r = _mm_hsub_epi16 (t1, t2);
+}
+
+/* Routine to manually compute the results */
+static void
+compute_correct_result (int *i1, int *i2, int *r)
+{
+  short *s1 = (short *) i1;
+  short *s2 = (short *) i2;
+  short *sout = (short *) r;
+  int i;
+
+  for (i = 0; i < 4; i++)
+    sout[i] = s1[2 * i] - s1[2 * i + 1];
+  for (i = 0; i < 4; i++)
+    sout[i + 4] = s2[2 * i] - s2[2 * i + 1];
+}
+
+static void
+TEST (void)
+{
+  int i;
+  int r [4] __attribute__ ((aligned(16)));
+  int ck [4];
+  int fail = 0;
+
+  for (i = 0; i < 256; i += 8)
+    {
+      /* Manually compute the result */
+      compute_correct_result (&vals[i + 0], &vals[i + 4], ck);
+
+#ifndef __AVX__
+      /* Run the 64-bit tests */
+      ssse3_test_phsubw (&vals[i + 0], &vals[i + 2], &r[0]);
+      ssse3_test_phsubw (&vals[i + 4], &vals[i + 6], &r[2]);
+      fail += chk_128 (ck, r);
+#endif
+
+      /* Run the 128-bit tests */
+      ssse3_test_phsubw128 (&vals[i + 0], &vals[i + 4], r);
+      fail += chk_128 (ck, r);
+    }
+
+  if (fail != 0)
+    abort ();
+}
Index: gcc/testsuite/gcc.target/powerpc/ssse3-pmaddubsw.c
===================================================================
diff --git a/trunk/gcc/testsuite/gcc.target/powerpc/ssse3-pmaddubsw.c b/trunk/gcc/testsuite/gcc.target/powerpc/ssse3-pmaddubsw.c
new file mode 10644
--- /dev/null	(revision 0)
+++ b/trunk/gcc/testsuite/gcc.target/powerpc/ssse3-pmaddubsw.c	(working copy)
@@ -0,0 +1,98 @@ 
+/* { dg-do run } */
+/* { dg-options "-O3 -mpower8-vector -Wno-psabi" } */
+/* { dg-require-effective-target p8vector_hw } */
+
+#ifndef CHECK_H
+#define CHECK_H "ssse3-check.h"
+#endif
+
+#ifndef TEST
+#define TEST ssse3_test
+#endif
+
+#include CHECK_H
+
+#include "ssse3-vals.h"
+
+#include <tmmintrin.h>
+
+#ifndef __AVX__
+/* Test the 64-bit form */
+static void
+ssse3_test_pmaddubsw (int *i1, int *i2, int *r)
+{
+  __m64 t1 = *(__m64 *) i1;
+  __m64 t2 = *(__m64 *) i2;
+  *(__m64 *) r = _mm_maddubs_pi16 (t1, t2);
+  _mm_empty ();
+}
+#endif
+
+/* Test the 128-bit form */
+static void
+ssse3_test_pmaddubsw128 (int *i1, int *i2, int *r)
+{
+  /* Assumes incoming pointers are 16-byte aligned */
+  __m128i t1 = *(__m128i *) i1;
+  __m128i t2 = *(__m128i *) i2;
+  *(__m128i *) r = _mm_maddubs_epi16 (t1, t2);
+}
+
+static short
+signed_saturate_to_word(int x)
+{
+  if (x > (int) 0x7fff)
+    return 0x7fff;
+
+  if (x < (int) 0xffff8000)
+    return 0x8000;
+
+  return (short) x;
+}
+
+/* Routine to manually compute the results */
+static void
+compute_correct_result (int *i1, int *i2, int *r)
+{
+  unsigned char *ub1 = (unsigned char *) i1;
+  char *sb2 = (char *) i2;
+  short *sout = (short *) r;
+  int t0;
+  int i;
+
+  for (i = 0; i < 8; i++)
+    { 
+      t0 = ((int) ub1[2 * i] * (int) sb2[2 * i] +
+	    (int) ub1[2 * i + 1] * (int) sb2[2 * i + 1]);
+      sout[i] = signed_saturate_to_word (t0);
+    }
+}
+
+static void
+TEST (void)
+{
+  int i;
+  int r [4] __attribute__ ((aligned(16)));
+  int ck [4];
+  int fail = 0;
+
+  for (i = 0; i < 256; i += 8)
+    {
+      /* Manually compute the result */
+      compute_correct_result (&vals[i + 0], &vals[i + 4], ck);
+
+#ifndef __AVX__
+      /* Run the 64-bit tests */
+      ssse3_test_pmaddubsw (&vals[i + 0], &vals[i + 4], &r[0]);
+      ssse3_test_pmaddubsw (&vals[i + 2], &vals[i + 6], &r[2]);
+      fail += chk_128 (ck, r);
+#endif
+
+      /* Run the 128-bit tests */
+      ssse3_test_pmaddubsw128 (&vals[i + 0], &vals[i + 4], r);
+      fail += chk_128 (ck, r);
+    }
+
+  if (fail != 0)
+    abort ();
+}
Index: gcc/testsuite/gcc.target/powerpc/ssse3-pmulhrsw.c
===================================================================
diff --git a/trunk/gcc/testsuite/gcc.target/powerpc/ssse3-pmulhrsw.c b/trunk/gcc/testsuite/gcc.target/powerpc/ssse3-pmulhrsw.c
new file mode 10644
--- /dev/null	(revision 0)
+++ b/trunk/gcc/testsuite/gcc.target/powerpc/ssse3-pmulhrsw.c	(working copy)
@@ -0,0 +1,85 @@ 
+/* { dg-do run } */
+/* { dg-options "-O3 -mpower8-vector -Wno-psabi" } */
+/* { dg-require-effective-target p8vector_hw } */
+
+#ifndef CHECK_H
+#define CHECK_H "ssse3-check.h"
+#endif
+
+#ifndef TEST
+#define TEST ssse3_test
+#endif
+
+#include CHECK_H
+
+#include "ssse3-vals.h"
+
+#include <tmmintrin.h>
+
+#ifndef __AVX__
+/* Test the 64-bit form */
+static void
+ssse3_test_pmulhrsw (int *i1, int *i2, int *r)
+{
+  __m64 t1 = *(__m64 *) i1;
+  __m64 t2 = *(__m64 *) i2;
+  *(__m64 *) r = _mm_mulhrs_pi16 (t1, t2);
+  _mm_empty ();
+}
+#endif
+
+/* Test the 128-bit form */
+static void
+ssse3_test_pmulhrsw128 (int *i1, int *i2, int *r)
+{
+  /* Assumes incoming pointers are 16-byte aligned */
+  __m128i t1 = *(__m128i *) i1;
+  __m128i t2 = *(__m128i *) i2;
+  *(__m128i *) r = _mm_mulhrs_epi16 (t1, t2);
+}
+
+/* Routine to manually compute the results */
+static void
+compute_correct_result (int *i1, int *i2, int *r)
+{
+  short *s1 = (short *) i1;
+  short *s2 = (short *) i2;
+  short *sout = (short *) r;
+  int t0;
+  int i;
+
+  for (i = 0; i < 8; i++)
+    {
+      t0 = (((int) s1[i] * (int) s2[i]) >> 14) + 1;
+      sout[i] = (short) (t0 >> 1);
+    }
+}
+
+static void
+TEST (void)
+{
+  int i;
+  int r [4] __attribute__ ((aligned(16)));
+  int ck [4];
+  int fail = 0;
+
+  for (i = 0; i < 256; i += 8)
+    {
+      /* Manually compute the result */
+      compute_correct_result (&vals[i + 0], &vals[i + 4], ck);
+
+#ifndef __AVX__
+      /* Run the 64-bit tests */
+      ssse3_test_pmulhrsw (&vals[i + 0], &vals[i + 4], &r[0]);
+      ssse3_test_pmulhrsw (&vals[i + 2], &vals[i + 6], &r[2]);
+      fail += chk_128 (ck, r);
+#endif
+
+      /* Run the 128-bit tests */
+      ssse3_test_pmulhrsw128 (&vals[i + 0], &vals[i + 4], r);
+      fail += chk_128 (ck, r);
+    }
+
+  if (fail != 0)
+    abort ();
+}
Index: gcc/testsuite/gcc.target/powerpc/ssse3-pshufb.c
===================================================================
diff --git a/trunk/gcc/testsuite/gcc.target/powerpc/ssse3-pshufb.c b/trunk/gcc/testsuite/gcc.target/powerpc/ssse3-pshufb.c
new file mode 10644
--- /dev/null	(revision 0)
+++ b/trunk/gcc/testsuite/gcc.target/powerpc/ssse3-pshufb.c	(working copy)
@@ -0,0 +1,114 @@ 
+/* { dg-do run } */
+/* { dg-options "-O3 -mpower8-vector -Wno-psabi" } */
+/* { dg-require-effective-target p8vector_hw } */
+
+#ifndef CHECK_H
+#define CHECK_H "ssse3-check.h"
+#endif
+
+#ifndef TEST
+#define TEST ssse3_test
+#endif
+
+#include CHECK_H
+
+#include "ssse3-vals.h"
+
+#include <tmmintrin.h>
+
+#ifndef __AVX__
+/* Test the 64-bit form */
+static void
+ssse3_test_pshufb (int *i1, int *i2, int *r)
+{
+  __m64 t1 = *(__m64 *) i1;
+  __m64 t2 = *(__m64 *) i2;
+  *(__m64 *)r = _mm_shuffle_pi8 (t1, t2);
+  _mm_empty ();
+}
+#endif
+
+/* Test the 128-bit form */
+static void
+ssse3_test_pshufb128 (int *i1, int *i2, int *r)
+{
+  /* Assumes incoming pointers are 16-byte aligned */
+  __m128i t1 = *(__m128i *) i1;
+  __m128i t2 = *(__m128i *) i2;
+  *(__m128i *)r = _mm_shuffle_epi8 (t1, t2);
+}
+
+#ifndef __AVX__
+/* Routine to manually compute the results */
+static void
+compute_correct_result_64 (int *i1, int *i2, int *r)
+{
+  char *b1 = (char *) i1;
+  char *b2 = (char *) i2;
+  char *bout = (char *) r;
+  int i;
+  char select;
+
+  for (i = 0; i < 16; i++)
+    {
+      select = b2[i];
+      if (select & 0x80)
+	bout[i] = 0;
+      else if (i < 8)
+	bout[i] = b1[select & 0x7];
+      else
+	bout[i] = b1[8 + (select & 0x7)];
+    }
+}
+#endif
+
+static void
+compute_correct_result_128 (int *i1, int *i2, int *r)
+{
+  char *b1 = (char *) i1;
+  char *b2 = (char *) i2;
+  char *bout = (char *) r;
+  int i;
+  char select;
+
+  for (i = 0; i < 16; i++)
+    {
+      select = b2[i];
+      if (select & 0x80)
+	bout[i] = 0;
+      else
+	bout[i] = b1[select & 0xf];
+    }
+}
+
+static void
+TEST (void)
+{
+  int i;
+  int r [4] __attribute__ ((aligned(16)));
+  int ck [4];
+  int fail = 0;
+
+  for (i = 0; i < 256; i += 8)
+    {
+#ifndef __AVX__
+      /* Manually compute the result */
+      compute_correct_result_64 (&vals[i + 0], &vals[i + 4], ck);
+
+      /* Run the 64-bit tests */
+      ssse3_test_pshufb (&vals[i + 0], &vals[i + 4], &r[0]);
+      ssse3_test_pshufb (&vals[i + 2], &vals[i + 6], &r[2]);
+      fail += chk_128 (ck, r);
+#endif
+
+      /* Recompute the result for 128-bits */
+      compute_correct_result_128 (&vals[i + 0], &vals[i + 4], ck);
+
+      /* Run the 128-bit tests */
+      ssse3_test_pshufb128 (&vals[i + 0], &vals[i + 4], r);
+      fail += chk_128 (ck, r);
+    }
+
+  if (fail != 0)
+    abort ();
+}
Index: gcc/testsuite/gcc.target/powerpc/ssse3-psignb.c
===================================================================
diff --git a/trunk/gcc/testsuite/gcc.target/powerpc/ssse3-psignb.c b/trunk/gcc/testsuite/gcc.target/powerpc/ssse3-psignb.c
new file mode 10644
--- /dev/null	(revision 0)
+++ b/trunk/gcc/testsuite/gcc.target/powerpc/ssse3-psignb.c	(working copy)
@@ -0,0 +1,85 @@ 
+/* { dg-do run } */
+/* { dg-options "-O3 -mpower8-vector -Wno-psabi" } */
+/* { dg-require-effective-target p8vector_hw } */
+
+#ifndef CHECK_H
+#define CHECK_H "ssse3-check.h"
+#endif
+
+#ifndef TEST
+#define TEST ssse3_test
+#endif
+
+#include CHECK_H
+#include "ssse3-vals.h"
+
+#include <tmmintrin.h>
+
+#ifndef __AVX__
+/* Test the 64-bit form */
+static void
+ssse3_test_psignb (int *i1, int *i2, int *r)
+{
+  __m64 t1 = *(__m64 *) i1;
+  __m64 t2 = *(__m64 *) i2;
+  *(__m64 *) r = _mm_sign_pi8 (t1, t2);
+  _mm_empty ();
+}
+#endif
+
+/* Test the 128-bit form */
+static void
+ssse3_test_psignb128 (int *i1, int *i2, int *r)
+{
+  /* Assumes incoming pointers are 16-byte aligned */
+  __m128i t1 = *(__m128i *) i1;
+  __m128i t2 = *(__m128i *) i2;
+  *(__m128i *) r = _mm_sign_epi8 (t1, t2);
+}
+
+/* Routine to manually compute the results */
+static void
+compute_correct_result (int *i1, int *i2, int *r)
+{
+  char *b1 = (char *) i1;
+  char *b2 = (char *) i2;
+  char *bout = (char *) r;
+  int i;
+
+  for (i = 0; i < 16; i++)
+    if (b2[i] < 0)
+      bout[i] = -b1[i];
+    else if (b2[i] == 0)
+      bout[i] = 0;
+    else
+      bout[i] = b1[i];
+}
+
+static void
+TEST (void)
+{
+  int i;
+  int r [4] __attribute__ ((aligned(16)));
+  int ck [4];
+  int fail = 0;
+
+  for (i = 0; i < 256; i += 8)
+    {
+      /* Manually compute the result */
+      compute_correct_result (&vals[i + 0], &vals[i + 4], ck);
+
+#ifndef __AVX__
+      /* Run the 64-bit tests */
+      ssse3_test_psignb (&vals[i + 0], &vals[i + 4], &r[0]);
+      ssse3_test_psignb (&vals[i + 2], &vals[i + 6], &r[2]);
+      fail += chk_128 (ck, r);
+#endif
+
+      /* Run the 128-bit tests */
+      ssse3_test_psignb128 (&vals[i + 0], &vals[i + 4], r);
+      fail += chk_128 (ck, r);
+    }
+
+  if (fail != 0)
+    abort ();
+}
Index: gcc/testsuite/gcc.target/powerpc/ssse3-psignd.c
===================================================================
diff --git a/trunk/gcc/testsuite/gcc.target/powerpc/ssse3-psignd.c b/trunk/gcc/testsuite/gcc.target/powerpc/ssse3-psignd.c
new file mode 10644
--- /dev/null	(revision 0)
+++ b/trunk/gcc/testsuite/gcc.target/powerpc/ssse3-psignd.c	(working copy)
@@ -0,0 +1,82 @@ 
+/* { dg-do run } */
+/* { dg-options "-O3 -mpower8-vector -Wno-psabi" } */
+/* { dg-require-effective-target p8vector_hw } */
+
+#ifndef CHECK_H
+#define CHECK_H "ssse3-check.h"
+#endif
+
+#ifndef TEST
+#define TEST ssse3_test
+#endif
+
+#include CHECK_H
+#include "ssse3-vals.h"
+
+#include <tmmintrin.h>
+
+#ifndef __AVX__
+/* Test the 64-bit form */
+static void
+ssse3_test_psignd (int *i1, int *i2, int *r)
+{
+  __m64 t1 = *(__m64 *) i1;
+  __m64 t2 = *(__m64 *) i2;
+  *(__m64 *) r = _mm_sign_pi32 (t1, t2);
+  _mm_empty ();
+}
+#endif
+
+/* Test the 128-bit form */
+static void
+ssse3_test_psignd128 (int *i1, int *i2, int *r)
+{
+  /* Assumes incoming pointers are 16-byte aligned */
+  __m128i t1 = *(__m128i *) i1;
+  __m128i t2 = *(__m128i *) i2;
+  *(__m128i *)r = _mm_sign_epi32 (t1, t2);
+}
+
+/* Routine to manually compute the results */
+static void
+compute_correct_result (int *i1, int *i2, int *r)
+{
+  int i;
+
+  for (i = 0; i < 4; i++)
+    if (i2[i] < 0)
+      r[i] = -i1[i];
+    else if (i2[i] == 0)
+      r[i] = 0;
+    else
+      r[i] = i1[i];
+}
+
+static void
+TEST (void)
+{
+  int i;
+  int r [4] __attribute__ ((aligned(16)));
+  int ck [4];
+  int fail = 0;
+
+  for (i = 0; i < 256; i += 8)
+    {
+      /* Manually compute the result */
+      compute_correct_result (&vals[i + 0], &vals[i + 4], ck);
+
+#ifndef __AVX__
+      /* Run the 64-bit tests */
+      ssse3_test_psignd (&vals[i + 0], &vals[i + 4], &r[0]);
+      ssse3_test_psignd (&vals[i + 2], &vals[i + 6], &r[2]);
+      fail += chk_128 (ck, r);
+#endif
+
+      /* Run the 128-bit tests */
+      ssse3_test_psignd128 (&vals[i + 0], &vals[i + 4], r);
+      fail += chk_128 (ck, r);
+    }
+
+  if (fail != 0)
+    abort ();
+}
Index: gcc/testsuite/gcc.target/powerpc/ssse3-psignw.c
===================================================================
diff --git a/trunk/gcc/testsuite/gcc.target/powerpc/ssse3-psignw.c b/trunk/gcc/testsuite/gcc.target/powerpc/ssse3-psignw.c
new file mode 10644
--- /dev/null	(revision 0)
+++ b/trunk/gcc/testsuite/gcc.target/powerpc/ssse3-psignw.c	(working copy)
@@ -0,0 +1,85 @@ 
+/* { dg-do run } */
+/* { dg-options "-O3 -mpower8-vector -Wno-psabi" } */
+/* { dg-require-effective-target p8vector_hw } */
+
+#ifndef CHECK_H
+#define CHECK_H "ssse3-check.h"
+#endif
+
+#ifndef TEST
+#define TEST ssse3_test
+#endif
+
+#include CHECK_H
+#include "ssse3-vals.h"
+
+#include <tmmintrin.h>
+
+#ifndef __AVX__
+/* Test the 64-bit form */
+static void
+ssse3_test_psignw (int *i1, int *i2, int *r)
+{
+  __m64 t1 = *(__m64 *) i1;
+  __m64 t2 = *(__m64 *) i2;
+  *(__m64 *) r = _mm_sign_pi16 (t1, t2);
+  _mm_empty ();
+}
+#endif
+
+/* Test the 128-bit form */
+static void
+ssse3_test_psignw128 (int *i1, int *i2, int *r)
+{
+  /* Assumes incoming pointers are 16-byte aligned */
+  __m128i t1 = *(__m128i *) i1;
+  __m128i t2 = *(__m128i *) i2;
+ *(__m128i *) r = _mm_sign_epi16 (t1, t2);
+}
+
+/* Routine to manually compute the results */
+static void
+compute_correct_result (int *i1, int *i2, int *r)
+{
+  short *s1 = (short *) i1;
+  short *s2 = (short *) i2;
+  short *sout = (short *) r;
+  int i;
+
+  for (i = 0; i < 8; i++)
+    if (s2[i] < 0)
+      sout[i] = -s1[i];
+    else if (s2[i] == 0)
+      sout[i] = 0;
+    else
+      sout[i] = s1[i];
+}
+
+static void
+TEST (void)
+{
+  int i;
+  int r [4] __attribute__ ((aligned(16)));
+  int ck [4];
+  int fail = 0;
+
+  for (i = 0; i < 256; i += 8)
+    {
+      /* Manually compute the result */
+      compute_correct_result (&vals[i + 0], &vals[i + 4], ck);
+
+#ifndef __AVX__
+      /* Run the 64-bit tests */
+      ssse3_test_psignw (&vals[i + 0], &vals[i + 4], &r[0]);
+      ssse3_test_psignw (&vals[i + 2], &vals[i + 6], &r[2]);
+      fail += chk_128 (ck, r);
+#endif
+
+      /* Run the 128-bit tests */
+      ssse3_test_psignw128 (&vals[i + 0], &vals[i + 4], r);
+      fail += chk_128 (ck, r);
+    }
+
+  if (fail != 0)
+    abort ();
+}
Index: gcc/testsuite/gcc.target/powerpc/ssse3-vals.h
===================================================================
diff --git a/trunk/gcc/testsuite/gcc.target/powerpc/ssse3-vals.h b/trunk/gcc/testsuite/gcc.target/powerpc/ssse3-vals.h
new file mode 10644
--- /dev/null	(revision 0)
+++ b/trunk/gcc/testsuite/gcc.target/powerpc/ssse3-vals.h	(working copy)
@@ -0,0 +1,60 @@ 
+/* Routine to check correctness of the results */
+static int
+chk_128 (int *v1, int *v2)
+{
+  int i;
+  int n_fails = 0;
+
+  for (i = 0; i < 4; i++)
+    if (v1[i] != v2[i])
+      n_fails += 1;
+
+  return n_fails;
+}
+
+static int vals [256] __attribute__ ((aligned(16))) =
+{
+  0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x5be800ee, 0x4f2d7b15,
+  0x409d9291, 0xdd95f27f, 0x423986e3, 0x21a4d2cd, 0xa7056d84, 0x4f4e5a3b,
+  0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+  0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+  0x73ef0244, 0xcd836329, 0x847f634f, 0xa7e3abcf, 0xb4c14764, 0x1ef42c06,
+  0x504f29ac, 0x4ae7ca73, 0xaddde3c9, 0xf63ded2e, 0xa5d3553d, 0xa52ae05f,
+  0x6fd3c83a, 0x7dc2b300, 0x76b05de7, 0xea8ebae5, 0x549568dd, 0x172f0358,
+  0x917eadf0, 0x796fb0a7, 0xb39381af, 0xd0591d61, 0x731d2f17, 0xbc4b6f5d,
+  0x8ec664c2, 0x3c199c19, 0x9c81db12, 0x6d85913b, 0x486107a9, 0xab6f4b26,
+  0x5630d37c, 0x20836e85, 0x40d4e746, 0xdfbaba36, 0xbeacaa69, 0xb3c84083,
+  0x8a688eb4, 0x08cde481, 0x66e7a190, 0x74ee1639, 0xb3942a19, 0xe0c40471,
+  0x9b789489, 0x9751207a, 0x543a1524, 0x41da7ad6, 0x614bb563, 0xf86f57b1,
+  0x69e62199, 0x2150cb12, 0x9ed74062, 0x429471f4, 0xad28502b, 0xf2e2d4d5,
+  0x45b6ce09, 0xaaa5e649, 0xb46da484, 0x0a637515, 0xae7a3212, 0x5afc784c,
+  0x776cfbbe, 0x9c542bb2, 0x64193aa8, 0x16e8a655, 0x4e3d2f92, 0xe05d7b72,
+  0x89854ebc, 0x8c318814, 0xb81e76e0, 0x3f2625f5, 0x61b44852, 0x5209d7ad,
+  0x842fe317, 0xd3cfcca1, 0x8d287cc7, 0x80f0c9a8, 0x4215f4e5, 0x563993d6,
+  0x5d627433, 0xc4449e35, 0x5b4fe009, 0x3ef92286, 0xacbc8927, 0x549ab870,
+  0x9ac5b959, 0xed8f1c91, 0x7ecf02cd, 0x989c0e8b, 0xa31d6918, 0x1dc2bcc1,
+  0x99d3f3cc, 0x6857acc8, 0x45d7324a, 0xaebdf2e6, 0x7af2f2ae, 0x09716f73,
+  0x7816e694, 0xc65493c0, 0x9f7e87bc, 0xaa96cd40, 0xbfb5bfc6, 0x01a2cce7,
+  0x5f1d8c46, 0x45303efb, 0xb24607c3, 0xef2009a7, 0xba873753, 0xbefb14bc,
+  0x74e53cd3, 0x70124708, 0x6eb4bdbd, 0xf3ba5e43, 0x4c94085f, 0x0c03e7e0,
+  0x9a084931, 0x62735424, 0xaeee77c5, 0xdb34f90f, 0x6860cbdd, 0xaf77cf9f,
+  0x95b28158, 0x23bd70d7, 0x9fbc3d88, 0x742e659e, 0x53bcfb48, 0xb8a63f6c,
+  0x4dcf3373, 0x2b168627, 0x4fe20745, 0xd0af5e94, 0x22514e6a, 0xb8ef25c2,
+  0x89ec781a, 0x13d9002b, 0x6d724500, 0x7fdbf63f, 0xb0e9ced5, 0xf919e0f3,
+  0x00fef203, 0x8905d47a, 0x434e7517, 0x4aef8e2c, 0x689f51e8, 0xe513b7c3,
+  0x72bbc5d2, 0x3a222f74, 0x05c3a0f9, 0xd5489d82, 0xb41fbe83, 0xec5d305f,
+  0x5ea02b0b, 0xb176065b, 0xa8eb404e, 0x80349117, 0x210fd49e, 0x43898d0e,
+  0x6c151b9c, 0x8742df18, 0x7b64de73, 0x1dbf52b2, 0x55c9cb19, 0xeb841f10,
+  0x10b8ae76, 0x0764ecb6, 0xb7479018, 0x2672cb3f, 0x7ac9ac90, 0x4be5332c,
+  0x8f1a0615, 0x4efb7a77, 0x16551a85, 0xdb2c3d66, 0x49179c07, 0x5dc4657e,
+  0x5e76907e, 0xd7486a9c, 0x445204a4, 0x65cdc426, 0x33f86ded, 0xcba95dda,
+  0x83351f16, 0xfedefad9, 0x639b620f, 0x86896a64, 0xba4099ba, 0x965f4a21,
+  0x1247154f, 0x25604c42, 0x5862d692, 0xb1e9149e, 0x612516a5, 0x02c49bf8,
+  0x631212bf, 0x9f69f54e, 0x168b63b0, 0x310a25ba, 0xa42a59cd, 0x084f0af9,
+  0x44a06cec, 0x5c0cda40, 0xb932d721, 0x7c42bb0d, 0x213cd3f0, 0xedc7f5a4,
+  0x7fb85859, 0x6b3da5ea, 0x61cd591e, 0xe8e9aa08, 0x4361fc34, 0x53d40d2a,
+  0x0511ad1b, 0xf996b44c, 0xb5ead756, 0xc022138d, 0x6172adf1, 0xa4a0a3b4,
+  0x8c2977b8, 0xa8e482ed, 0x04fcdd6b, 0x3f7b85d4, 0x4fca1e46, 0xa392ddca,
+  0x569fc791, 0x346a706c, 0x543bf3eb, 0x895b3cde, 0x2146bb80, 0x26b3c168,
+  0x929998db, 0x1ea472c9, 0x7207b36b, 0x6a8f10d4 
+};