Patchwork [8/9] Testsuite: split tests for strided accesses

login
register
mail settings
Submitter Richard Sandiford
Date April 12, 2011, 2:19 p.m.
Message ID <g4r597eemb.fsf@linaro.org>
Download mbox | patch
Permalink /patch/90815/
State New
Headers show

Comments

Richard Sandiford - April 12, 2011, 2:19 p.m.
The next patch introduces separate vect_stridedN target selectors
for each tested stride factor N.  At the moment, some tests contain
several independent loops that have different stride factors.
It's easier to make the next change if we put these loops into
separate tests.

Tested on x86_64-linux-gnu and arm-linux-gnueabi.  OK to install?

Richard


gcc/testsuite/
	* gcc.dg/vect/slp-11.c: Split into...
	* gcc.dg/vect/slp-11a.c, gcc.dg/vect/slp-11b.c,
	gcc.dg/vect/slp-11c.c: ...these tests.
	* gcc.dg/vect/slp-12a.c: Split 4-stride loop into...
	* gcc.dg/vect/slp-12c.c: ...this new test.
	* gcc.dg/vect/slp-19.c: Split into...
	* gcc.dg/vect/slp-19a.c, gcc.dg/vect/slp-19b.c,
	gcc.dg/vect/slp-19c.c: ...these new tests.
Richard Guenther - April 15, 2011, 12:41 p.m.
On Tue, Apr 12, 2011 at 4:19 PM, Richard Sandiford
<richard.sandiford@linaro.org> wrote:
> The next patch introduces separate vect_stridedN target selectors
> for each tested stride factor N.  At the moment, some tests contain
> several independent loops that have different stride factors.
> It's easier to make the next change if we put these loops into
> separate tests.
>
> Tested on x86_64-linux-gnu and arm-linux-gnueabi.  OK to install?

Ok.

Thanks,
Richard.

> Richard
>
>
> gcc/testsuite/
>        * gcc.dg/vect/slp-11.c: Split into...
>        * gcc.dg/vect/slp-11a.c, gcc.dg/vect/slp-11b.c,
>        gcc.dg/vect/slp-11c.c: ...these tests.
>        * gcc.dg/vect/slp-12a.c: Split 4-stride loop into...
>        * gcc.dg/vect/slp-12c.c: ...this new test.
>        * gcc.dg/vect/slp-19.c: Split into...
>        * gcc.dg/vect/slp-19a.c, gcc.dg/vect/slp-19b.c,
>        gcc.dg/vect/slp-19c.c: ...these new tests.
>
> Index: gcc/testsuite/gcc.dg/vect/slp-11.c
> ===================================================================
> --- gcc/testsuite/gcc.dg/vect/slp-11.c  2011-04-12 15:18:24.000000000 +0100
> +++ /dev/null   2011-03-23 08:42:11.268792848 +0000
> @@ -1,113 +0,0 @@
> -/* { dg-require-effective-target vect_int } */
> -
> -#include <stdarg.h>
> -#include "tree-vect.h"
> -
> -#define N 8
> -
> -int
> -main1 ()
> -{
> -  int i;
> -  unsigned int out[N*8], a0, a1, a2, a3, a4, a5, a6, a7, b1, b0, b2, b3, b4, b5, b6, b7;
> -  unsigned int in[N*8] = {0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63};
> -  float out2[N*8];
> -
> -  /* Different operations - not SLPable.  */
> -  for (i = 0; i < N; i++)
> -    {
> -      a0 = in[i*8] + 5;
> -      a1 = in[i*8 + 1] * 6;
> -      a2 = in[i*8 + 2] + 7;
> -      a3 = in[i*8 + 3] + 8;
> -      a4 = in[i*8 + 4] + 9;
> -      a5 = in[i*8 + 5] + 10;
> -      a6 = in[i*8 + 6] + 11;
> -      a7 = in[i*8 + 7] + 12;
> -
> -      b0 = a0 * 3;
> -      b1 = a1 * 2;
> -      b2 = a2 * 12;
> -      b3 = a3 * 5;
> -      b4 = a4 * 8;
> -      b5 = a5 * 4;
> -      b6 = a6 * 3;
> -      b7 = a7 * 2;
> -
> -      out[i*8] = b0 - 2;
> -      out[i*8 + 1] = b1 - 3;
> -      out[i*8 + 2] = b2 - 2;
> -      out[i*8 + 3] = b3 - 1;
> -      out[i*8 + 4] = b4 - 8;
> -      out[i*8 + 5] = b5 - 7;
> -      out[i*8 + 6] = b6 - 3;
> -      out[i*8 + 7] = b7 - 7;
> -    }
> -
> -  /* check results:  */
> -  for (i = 0; i < N; i++)
> -    {
> -      if (out[i*8] !=  (in[i*8] + 5) * 3 - 2
> -         || out[i*8 + 1] != (in[i*8 + 1] * 6) * 2 - 3
> -         || out[i*8 + 2] != (in[i*8 + 2] + 7) * 12 - 2
> -         || out[i*8 + 3] != (in[i*8 + 3] + 8) * 5 - 1
> -         || out[i*8 + 4] != (in[i*8 + 4] + 9) * 8 - 8
> -         || out[i*8 + 5] != (in[i*8 + 5] + 10) * 4 - 7
> -         || out[i*8 + 6] != (in[i*8 + 6] + 11) * 3 - 3
> -         || out[i*8 + 7] != (in[i*8 + 7] + 12) * 2 - 7)
> -       abort ();
> -    }
> -
> -  /* Requires permutation - not SLPable.  */
> -  for (i = 0; i < N*2; i++)
> -    {
> -      out[i*4] = (in[i*4] + 2) * 3;
> -      out[i*4 + 1] = (in[i*4 + 2] + 2) * 7;
> -      out[i*4 + 2] = (in[i*4 + 1] + 7) * 3;
> -      out[i*4 + 3] = (in[i*4 + 3] + 3) * 4;
> -    }
> -
> -  /* check results:  */
> -  for (i = 0; i < N*2; i++)
> -    {
> -      if (out[i*4] !=  (in[i*4] + 2) * 3
> -         || out[i*4 + 1] != (in[i*4 + 2] + 2) * 7
> -         || out[i*4 + 2] != (in[i*4 + 1] + 7) * 3
> -         || out[i*4 + 3] != (in[i*4 + 3] + 3) * 4)
> -        abort ();
> -    }
> -
> -  /* Different operations - not SLPable.  */
> -  for (i = 0; i < N*4; i++)
> -    {
> -      out2[i*2] = ((float) in[i*2] * 2 + 6) ;
> -      out2[i*2 + 1] = (float) (in[i*2 + 1] * 3 + 7);
> -    }
> -
> -  /* check results:  */
> -  for (i = 0; i < N*4; i++)
> -    {
> -      if (out2[i*2] !=  ((float) in[i*2] * 2 + 6)
> -         || out2[i*2 + 1] != (float) (in[i*2 + 1] * 3 + 7))
> -        abort ();
> -    }
> -
> -
> -  return 0;
> -}
> -
> -int main (void)
> -{
> -  check_vect ();
> -
> -  main1 ();
> -
> -  return 0;
> -}
> -
> -/* { dg-final { scan-tree-dump-times "vectorized 3 loops" 1 "vect"  { target { { vect_uintfloat_cvt && vect_strided } &&  vect_int_mult } } } } */
> -/* { dg-final { scan-tree-dump-times "vectorized 2 loops" 1 "vect"  { target { { { ! vect_uintfloat_cvt } && vect_strided } &&  vect_int_mult } } } } */
> -/* { dg-final { scan-tree-dump-times "vectorized 0 loops" 1 "vect"  {target  { ! { vect_int_mult && vect_strided } } } } }  */
> -/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 0  "vect"  } } */
> -/* { dg-final { cleanup-tree-dump "vect" } } */
> -
> Index: gcc/testsuite/gcc.dg/vect/slp-11a.c
> ===================================================================
> --- /dev/null   2011-03-23 08:42:11.268792848 +0000
> +++ gcc/testsuite/gcc.dg/vect/slp-11a.c 2011-04-12 15:18:25.000000000 +0100
> @@ -0,0 +1,75 @@
> +/* { dg-require-effective-target vect_int } */
> +
> +#include <stdarg.h>
> +#include "tree-vect.h"
> +
> +#define N 8
> +
> +int
> +main1 ()
> +{
> +  int i;
> +  unsigned int out[N*8], a0, a1, a2, a3, a4, a5, a6, a7, b1, b0, b2, b3, b4, b5, b6, b7;
> +  unsigned int in[N*8] = {0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63};
> +
> +  /* Different operations - not SLPable.  */
> +  for (i = 0; i < N; i++)
> +    {
> +      a0 = in[i*8] + 5;
> +      a1 = in[i*8 + 1] * 6;
> +      a2 = in[i*8 + 2] + 7;
> +      a3 = in[i*8 + 3] + 8;
> +      a4 = in[i*8 + 4] + 9;
> +      a5 = in[i*8 + 5] + 10;
> +      a6 = in[i*8 + 6] + 11;
> +      a7 = in[i*8 + 7] + 12;
> +
> +      b0 = a0 * 3;
> +      b1 = a1 * 2;
> +      b2 = a2 * 12;
> +      b3 = a3 * 5;
> +      b4 = a4 * 8;
> +      b5 = a5 * 4;
> +      b6 = a6 * 3;
> +      b7 = a7 * 2;
> +
> +      out[i*8] = b0 - 2;
> +      out[i*8 + 1] = b1 - 3;
> +      out[i*8 + 2] = b2 - 2;
> +      out[i*8 + 3] = b3 - 1;
> +      out[i*8 + 4] = b4 - 8;
> +      out[i*8 + 5] = b5 - 7;
> +      out[i*8 + 6] = b6 - 3;
> +      out[i*8 + 7] = b7 - 7;
> +    }
> +
> +  /* check results:  */
> +  for (i = 0; i < N; i++)
> +    {
> +      if (out[i*8] !=  (in[i*8] + 5) * 3 - 2
> +         || out[i*8 + 1] != (in[i*8 + 1] * 6) * 2 - 3
> +         || out[i*8 + 2] != (in[i*8 + 2] + 7) * 12 - 2
> +         || out[i*8 + 3] != (in[i*8 + 3] + 8) * 5 - 1
> +         || out[i*8 + 4] != (in[i*8 + 4] + 9) * 8 - 8
> +         || out[i*8 + 5] != (in[i*8 + 5] + 10) * 4 - 7
> +         || out[i*8 + 6] != (in[i*8 + 6] + 11) * 3 - 3
> +         || out[i*8 + 7] != (in[i*8 + 7] + 12) * 2 - 7)
> +       abort ();
> +    }
> +
> +  return 0;
> +}
> +
> +int main (void)
> +{
> +  check_vect ();
> +
> +  main1 ();
> +
> +  return 0;
> +}
> +
> +/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target { vect_strided && vect_int_mult } } } } */
> +/* { dg-final { scan-tree-dump-times "vectorized 0 loops" 1 "vect" { target { ! { vect_strided && vect_int_mult } } } } } */
> +/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 0 "vect" } } */
> +/* { dg-final { cleanup-tree-dump "vect" } } */
> Index: gcc/testsuite/gcc.dg/vect/slp-11b.c
> ===================================================================
> --- /dev/null   2011-03-23 08:42:11.268792848 +0000
> +++ gcc/testsuite/gcc.dg/vect/slp-11b.c 2011-04-12 15:18:25.000000000 +0100
> @@ -0,0 +1,49 @@
> +/* { dg-require-effective-target vect_int } */
> +
> +#include <stdarg.h>
> +#include "tree-vect.h"
> +
> +#define N 8
> +
> +int
> +main1 ()
> +{
> +  int i;
> +  unsigned int out[N*8], a0, a1, a2, a3, a4, a5, a6, a7, b1, b0, b2, b3, b4, b5, b6, b7;
> +  unsigned int in[N*8] = {0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63};
> +
> +  /* Requires permutation - not SLPable.  */
> +  for (i = 0; i < N*2; i++)
> +    {
> +      out[i*4] = (in[i*4] + 2) * 3;
> +      out[i*4 + 1] = (in[i*4 + 2] + 2) * 7;
> +      out[i*4 + 2] = (in[i*4 + 1] + 7) * 3;
> +      out[i*4 + 3] = (in[i*4 + 3] + 3) * 4;
> +    }
> +
> +  /* check results:  */
> +  for (i = 0; i < N*2; i++)
> +    {
> +      if (out[i*4] !=  (in[i*4] + 2) * 3
> +         || out[i*4 + 1] != (in[i*4 + 2] + 2) * 7
> +         || out[i*4 + 2] != (in[i*4 + 1] + 7) * 3
> +         || out[i*4 + 3] != (in[i*4 + 3] + 3) * 4)
> +        abort ();
> +    }
> +
> +  return 0;
> +}
> +
> +int main (void)
> +{
> +  check_vect ();
> +
> +  main1 ();
> +
> +  return 0;
> +}
> +
> +/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target { vect_strided && vect_int_mult } } } } */
> +/* { dg-final { scan-tree-dump-times "vectorized 0 loops" 1 "vect" { target { ! { vect_strided && vect_int_mult } } } } } */
> +/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 0 "vect" } } */
> +/* { dg-final { cleanup-tree-dump "vect" } } */
> Index: gcc/testsuite/gcc.dg/vect/slp-11c.c
> ===================================================================
> --- /dev/null   2011-03-23 08:42:11.268792848 +0000
> +++ gcc/testsuite/gcc.dg/vect/slp-11c.c 2011-04-12 15:18:25.000000000 +0100
> @@ -0,0 +1,46 @@
> +/* { dg-require-effective-target vect_int } */
> +
> +#include <stdarg.h>
> +#include "tree-vect.h"
> +
> +#define N 8
> +
> +int
> +main1 ()
> +{
> +  int i;
> +  unsigned int in[N*8] = {0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63};
> +  float out[N*8];
> +
> +  /* Different operations - not SLPable.  */
> +  for (i = 0; i < N*4; i++)
> +    {
> +      out[i*2] = ((float) in[i*2] * 2 + 6) ;
> +      out[i*2 + 1] = (float) (in[i*2 + 1] * 3 + 7);
> +    }
> +
> +  /* check results:  */
> +  for (i = 0; i < N*4; i++)
> +    {
> +      if (out[i*2] !=  ((float) in[i*2] * 2 + 6)
> +         || out[i*2 + 1] != (float) (in[i*2 + 1] * 3 + 7))
> +        abort ();
> +    }
> +
> +
> +  return 0;
> +}
> +
> +int main (void)
> +{
> +  check_vect ();
> +
> +  main1 ();
> +
> +  return 0;
> +}
> +
> +/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target { { vect_uintfloat_cvt && vect_strided } && vect_int_mult } } } } */
> +/* { dg-final { scan-tree-dump-times "vectorized 0 loops" 1 "vect" { target { ! { { vect_uintfloat_cvt && vect_strided } && vect_int_mult } } } } } */
> +/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 0  "vect"  } } */
> +/* { dg-final { cleanup-tree-dump "vect" } } */
> Index: gcc/testsuite/gcc.dg/vect/slp-12a.c
> ===================================================================
> --- gcc/testsuite/gcc.dg/vect/slp-12a.c 2011-04-12 15:18:24.000000000 +0100
> +++ gcc/testsuite/gcc.dg/vect/slp-12a.c 2011-04-12 15:18:25.000000000 +0100
> @@ -11,7 +11,7 @@ main1 ()
>   int i;
>   unsigned int out[N*8], a0, a1, a2, a3, a4, a5, a6, a7, b1, b0, b2, b3, b4, b5, b6, b7;
>   unsigned int in[N*8] = {0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63};
> -  unsigned int ia[N], ib[N*2];
> +  unsigned int ia[N];
>
>   for (i = 0; i < N; i++)
>     {
> @@ -61,27 +61,6 @@ main1 ()
>        abort ();
>     }
>
> -  for (i = 0; i < N*2; i++)
> -    {
> -      out[i*4] = (in[i*4] + 2) * 3;
> -      out[i*4 + 1] = (in[i*4 + 1] + 2) * 7;
> -      out[i*4 + 2] = (in[i*4 + 2] + 7) * 3;
> -      out[i*4 + 3] = (in[i*4 + 3] + 7) * 7;
> -
> -      ib[i] = 7;
> -    }
> -
> -  /* check results:  */
> -  for (i = 0; i < N*2; i++)
> -    {
> -      if (out[i*4] !=  (in[i*4] + 2) * 3
> -         || out[i*4 + 1] != (in[i*4 + 1] + 2) * 7
> -         || out[i*4 + 2] != (in[i*4 + 2] + 7) * 3
> -         || out[i*4 + 3] != (in[i*4 + 3] + 7) * 7
> -         || ib[i] != 7)
> -        abort ();
> -    }
> -
>   return 0;
>  }
>
> @@ -94,11 +73,8 @@ int main (void)
>   return 0;
>  }
>
> -/* { dg-final { scan-tree-dump-times "vectorized 2 loops" 1 "vect"  {target { vect_strided && vect_int_mult} } } } */
> -/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect"  {target { {! {vect_strided}} && vect_int_mult } } } } */
> -/* { dg-final { scan-tree-dump-times "vectorized 0 loops" 1 "vect"  {target  { ! vect_int_mult } } } } */
> -/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 2 "vect" {target { vect_strided && vect_int_mult } } } } */
> -/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 1 "vect" {target { {! {vect_strided}} && vect_int_mult } } } } */
> -/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 0 "vect" {target  { ! vect_int_mult } } } } */
> +/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target { vect_strided && vect_int_mult } } } } */
> +/* { dg-final { scan-tree-dump-times "vectorized 0 loops" 1 "vect" { target { ! { vect_strided && vect_int_mult } } } } } */
> +/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 1 "vect" { target { vect_strided && vect_int_mult } } } } */
> +/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 0 "vect" { target { ! { vect_strided && vect_int_mult } } } } } */
>  /* { dg-final { cleanup-tree-dump "vect" } } */
> -
> Index: gcc/testsuite/gcc.dg/vect/slp-12c.c
> ===================================================================
> --- /dev/null   2011-03-23 08:42:11.268792848 +0000
> +++ gcc/testsuite/gcc.dg/vect/slp-12c.c 2011-04-12 15:18:25.000000000 +0100
> @@ -0,0 +1,53 @@
> +/* { dg-require-effective-target vect_int } */
> +
> +#include <stdarg.h>
> +#include "tree-vect.h"
> +
> +#define N 8
> +
> +int
> +main1 ()
> +{
> +  int i;
> +  unsigned int out[N*8], a0, a1, a2, a3, a4, a5, a6, a7, b1, b0, b2, b3, b4, b5, b6, b7;
> +  unsigned int in[N*8] = {0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63};
> +  unsigned int ia[N*2];
> +
> +  for (i = 0; i < N*2; i++)
> +    {
> +      out[i*4] = (in[i*4] + 2) * 3;
> +      out[i*4 + 1] = (in[i*4 + 1] + 2) * 7;
> +      out[i*4 + 2] = (in[i*4 + 2] + 7) * 3;
> +      out[i*4 + 3] = (in[i*4 + 3] + 7) * 7;
> +
> +      ia[i] = 7;
> +    }
> +
> +  /* check results:  */
> +  for (i = 0; i < N*2; i++)
> +    {
> +      if (out[i*4] !=  (in[i*4] + 2) * 3
> +         || out[i*4 + 1] != (in[i*4 + 1] + 2) * 7
> +         || out[i*4 + 2] != (in[i*4 + 2] + 7) * 3
> +         || out[i*4 + 3] != (in[i*4 + 3] + 7) * 7
> +         || ia[i] != 7)
> +        abort ();
> +    }
> +
> +  return 0;
> +}
> +
> +int main (void)
> +{
> +  check_vect ();
> +
> +  main1 ();
> +
> +  return 0;
> +}
> +
> +/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect"  { target { vect_int_mult } } } } */
> +/* { dg-final { scan-tree-dump-times "vectorized 0 loops" 1 "vect"  { target { ! vect_int_mult } } } } */
> +/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 1 "vect" { target vect_int_mult } } } */
> +/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 0 "vect" { target { ! vect_int_mult } } } } */
> +/* { dg-final { cleanup-tree-dump "vect" } } */
> Index: gcc/testsuite/gcc.dg/vect/slp-19.c
> ===================================================================
> --- gcc/testsuite/gcc.dg/vect/slp-19.c  2011-04-12 15:18:24.000000000 +0100
> +++ /dev/null   2011-03-23 08:42:11.268792848 +0000
> @@ -1,154 +0,0 @@
> -/* { dg-require-effective-target vect_int } */
> -
> -#include <stdarg.h>
> -#include "tree-vect.h"
> -
> -#define N 16
> -
> -int
> -main1 ()
> -{
> -  unsigned int i;
> -  unsigned int out[N*8];
> -  unsigned int in[N*8] = {0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63};
> -  unsigned int ia[N*2], a0, a1, a2, a3;
> -
> -  for (i = 0; i < N; i++)
> -    {
> -      out[i*8] = in[i*8];
> -      out[i*8 + 1] = in[i*8 + 1];
> -      out[i*8 + 2] = in[i*8 + 2];
> -      out[i*8 + 3] = in[i*8 + 3];
> -      out[i*8 + 4] = in[i*8 + 4];
> -      out[i*8 + 5] = in[i*8 + 5];
> -      out[i*8 + 6] = in[i*8 + 6];
> -      out[i*8 + 7] = in[i*8 + 7];
> -
> -      ia[i] = in[i*8 + 2];
> -    }
> -
> -  /* check results:  */
> -  for (i = 0; i < N; i++)
> -    {
> -      if (out[i*8] !=  in[i*8]
> -         || out[i*8 + 1] != in[i*8 + 1]
> -         || out[i*8 + 2] != in[i*8 + 2]
> -         || out[i*8 + 3] != in[i*8 + 3]
> -         || out[i*8 + 4] != in[i*8 + 4]
> -         || out[i*8 + 5] != in[i*8 + 5]
> -         || out[i*8 + 6] != in[i*8 + 6]
> -         || out[i*8 + 7] != in[i*8 + 7]
> -         || ia[i] != in[i*8 + 2])
> -       abort ();
> -    }
> -
> -  for (i = 0; i < N*2; i++)
> -    {
> -      a0 = in[i*4] + 1;
> -      a1 = in[i*4 + 1] + 2;
> -      a2 = in[i*4 + 2] + 3;
> -      a3 = in[i*4 + 3] + 4;
> -
> -      out[i*4] = a0;
> -      out[i*4 + 1] = a1;
> -      out[i*4 + 2] = a2;
> -      out[i*4 + 3] = a3;
> -
> -      ia[i] = a2;
> -    }
> -
> -  /* check results:  */
> -  for (i = 0; i < N*2; i++)
> -    {
> -      if (out[i*4] !=  in[i*4] + 1
> -         || out[i*4 + 1] != in[i*4 + 1] + 2
> -         || out[i*4 + 2] != in[i*4 + 2] + 3
> -         || out[i*4 + 3] != in[i*4 + 3] + 4
> -         || ia[i] != in[i*4 + 2] + 3)
> -        abort ();
> -    }
> -
> -  /* The last stmt requires interleaving of not power of 2 size - not
> -     vectorizable.  */
> -  for (i = 0; i < N/2; i++)
> -    {
> -      out[i*12] = in[i*12];
> -      out[i*12 + 1] = in[i*12 + 1];
> -      out[i*12 + 2] = in[i*12 + 2];
> -      out[i*12 + 3] = in[i*12 + 3];
> -      out[i*12 + 4] = in[i*12 + 4];
> -      out[i*12 + 5] = in[i*12 + 5];
> -      out[i*12 + 6] = in[i*12 + 6];
> -      out[i*12 + 7] = in[i*12 + 7];
> -      out[i*12 + 8] = in[i*12 + 8];
> -      out[i*12 + 9] = in[i*12 + 9];
> -      out[i*12 + 10] = in[i*12 + 10];
> -      out[i*12 + 11] = in[i*12 + 11];
> -
> -      ia[i] = in[i*12 + 7];
> -    }
> -
> -  /* check results:  */
> -  for (i = 0; i < N/2; i++)
> -    {
> -      if (out[i*12] !=  in[i*12]
> -         || out[i*12 + 1] != in[i*12 + 1]
> -         || out[i*12 + 2] != in[i*12 + 2]
> -         || out[i*12 + 3] != in[i*12 + 3]
> -         || out[i*12 + 4] != in[i*12 + 4]
> -         || out[i*12 + 5] != in[i*12 + 5]
> -         || out[i*12 + 6] != in[i*12 + 6]
> -         || out[i*12 + 7] != in[i*12 + 7]
> -         || out[i*12 + 8] != in[i*12 + 8]
> -         || out[i*12 + 9] != in[i*12 + 9]
> -         || out[i*12 + 10] != in[i*12 + 10]
> -         || out[i*12 + 11] != in[i*12 + 11]
> -         || ia[i] != in[i*12 + 7])
> -        abort ();
> -    }
> -
> -  /* Hybrid SLP with unrolling by 2.  */
> -  for (i = 0; i < N; i++)
> -    {
> -      out[i*6] = in[i*6];
> -      out[i*6 + 1] = in[i*6 + 1];
> -      out[i*6 + 2] = in[i*6 + 2];
> -      out[i*6 + 3] = in[i*6 + 3];
> -      out[i*6 + 4] = in[i*6 + 4];
> -      out[i*6 + 5] = in[i*6 + 5];
> -
> -      ia[i] = i;
> -    }
> -
> -  /* check results:  */
> -  for (i = 0; i < N/2; i++)
> -    {
> -      if (out[i*6] !=  in[i*6]
> -         || out[i*6 + 1] != in[i*6 + 1]
> -         || out[i*6 + 2] != in[i*6 + 2]
> -         || out[i*6 + 3] != in[i*6 + 3]
> -         || out[i*6 + 4] != in[i*6 + 4]
> -         || out[i*6 + 5] != in[i*6 + 5]
> -         || ia[i] != i)
> -        abort ();
> -    }
> -
> -
> -  return 0;
> -}
> -
> -int main (void)
> -{
> -  check_vect ();
> -
> -  main1 ();
> -
> -  return 0;
> -}
> -
> -/* { dg-final { scan-tree-dump-times "vectorized 3 loops" 1 "vect" { target  vect_strided  } } } */
> -/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target  { ! { vect_strided } } } } } */
> -/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 3 "vect"  { target  vect_strided  } } } */
> -/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 1 "vect"  { target { ! { vect_strided } } } } } */
> -/* { dg-final { cleanup-tree-dump "vect" } } */
> -
> Index: gcc/testsuite/gcc.dg/vect/slp-19a.c
> ===================================================================
> --- /dev/null   2011-03-23 08:42:11.268792848 +0000
> +++ gcc/testsuite/gcc.dg/vect/slp-19a.c 2011-04-12 15:18:25.000000000 +0100
> @@ -0,0 +1,61 @@
> +/* { dg-require-effective-target vect_int } */
> +
> +#include <stdarg.h>
> +#include "tree-vect.h"
> +
> +#define N 16
> +
> +int
> +main1 ()
> +{
> +  unsigned int i;
> +  unsigned int out[N*8];
> +  unsigned int in[N*8] = {0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63};
> +  unsigned int ia[N*2];
> +
> +  for (i = 0; i < N; i++)
> +    {
> +      out[i*8] = in[i*8];
> +      out[i*8 + 1] = in[i*8 + 1];
> +      out[i*8 + 2] = in[i*8 + 2];
> +      out[i*8 + 3] = in[i*8 + 3];
> +      out[i*8 + 4] = in[i*8 + 4];
> +      out[i*8 + 5] = in[i*8 + 5];
> +      out[i*8 + 6] = in[i*8 + 6];
> +      out[i*8 + 7] = in[i*8 + 7];
> +
> +      ia[i] = in[i*8 + 2];
> +    }
> +
> +  /* check results:  */
> +  for (i = 0; i < N; i++)
> +    {
> +      if (out[i*8] !=  in[i*8]
> +         || out[i*8 + 1] != in[i*8 + 1]
> +         || out[i*8 + 2] != in[i*8 + 2]
> +         || out[i*8 + 3] != in[i*8 + 3]
> +         || out[i*8 + 4] != in[i*8 + 4]
> +         || out[i*8 + 5] != in[i*8 + 5]
> +         || out[i*8 + 6] != in[i*8 + 6]
> +         || out[i*8 + 7] != in[i*8 + 7]
> +         || ia[i] != in[i*8 + 2])
> +       abort ();
> +    }
> +
> +  return 0;
> +}
> +
> +int main (void)
> +{
> +  check_vect ();
> +
> +  main1 ();
> +
> +  return 0;
> +}
> +
> +/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target vect_strided } } } */
> +/* { dg-final { scan-tree-dump-times "vectorized 0 loops" 1 "vect" { target { ! vect_strided } } } } */
> +/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 1 "vect" { target vect_strided } } } */
> +/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 0 "vect" { target { ! vect_strided } } } } */
> +/* { dg-final { cleanup-tree-dump "vect" } } */
> Index: gcc/testsuite/gcc.dg/vect/slp-19b.c
> ===================================================================
> --- /dev/null   2011-03-23 08:42:11.268792848 +0000
> +++ gcc/testsuite/gcc.dg/vect/slp-19b.c 2011-04-12 15:18:25.000000000 +0100
> @@ -0,0 +1,58 @@
> +/* { dg-require-effective-target vect_int } */
> +
> +#include <stdarg.h>
> +#include "tree-vect.h"
> +
> +#define N 16
> +
> +int
> +main1 ()
> +{
> +  unsigned int i;
> +  unsigned int out[N*8];
> +  unsigned int in[N*8] = {0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63};
> +  unsigned int ia[N*2], a0, a1, a2, a3;
> +
> +  for (i = 0; i < N*2; i++)
> +    {
> +      a0 = in[i*4] + 1;
> +      a1 = in[i*4 + 1] + 2;
> +      a2 = in[i*4 + 2] + 3;
> +      a3 = in[i*4 + 3] + 4;
> +
> +      out[i*4] = a0;
> +      out[i*4 + 1] = a1;
> +      out[i*4 + 2] = a2;
> +      out[i*4 + 3] = a3;
> +
> +      ia[i] = a2;
> +    }
> +
> +  /* check results:  */
> +  for (i = 0; i < N*2; i++)
> +    {
> +      if (out[i*4] !=  in[i*4] + 1
> +         || out[i*4 + 1] != in[i*4 + 1] + 2
> +         || out[i*4 + 2] != in[i*4 + 2] + 3
> +         || out[i*4 + 3] != in[i*4 + 3] + 4
> +         || ia[i] != in[i*4 + 2] + 3)
> +        abort ();
> +    }
> +
> +  return 0;
> +}
> +
> +int main (void)
> +{
> +  check_vect ();
> +
> +  main1 ();
> +
> +  return 0;
> +}
> +
> +/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target vect_strided } } } */
> +/* { dg-final { scan-tree-dump-times "vectorized 0 loops" 1 "vect" { target { ! vect_strided } } } } */
> +/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 1 "vect" { target vect_strided } } } */
> +/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 0 "vect" { target { ! vect_strided } } } } */
> +/* { dg-final { cleanup-tree-dump "vect" } } */
> Index: gcc/testsuite/gcc.dg/vect/slp-19c.c
> ===================================================================
> --- /dev/null   2011-03-23 08:42:11.268792848 +0000
> +++ gcc/testsuite/gcc.dg/vect/slp-19c.c 2011-04-12 15:18:25.000000000 +0100
> @@ -0,0 +1,95 @@
> +/* { dg-require-effective-target vect_int } */
> +
> +#include <stdarg.h>
> +#include "tree-vect.h"
> +
> +#define N 16
> +
> +int
> +main1 ()
> +{
> +  unsigned int i;
> +  unsigned int out[N*8];
> +  unsigned int in[N*8] = {0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63};
> +  unsigned int ia[N*2], a0, a1, a2, a3;
> +
> +  /* The last stmt requires interleaving of not power of 2 size - not
> +     vectorizable.  */
> +  for (i = 0; i < N/2; i++)
> +    {
> +      out[i*12] = in[i*12];
> +      out[i*12 + 1] = in[i*12 + 1];
> +      out[i*12 + 2] = in[i*12 + 2];
> +      out[i*12 + 3] = in[i*12 + 3];
> +      out[i*12 + 4] = in[i*12 + 4];
> +      out[i*12 + 5] = in[i*12 + 5];
> +      out[i*12 + 6] = in[i*12 + 6];
> +      out[i*12 + 7] = in[i*12 + 7];
> +      out[i*12 + 8] = in[i*12 + 8];
> +      out[i*12 + 9] = in[i*12 + 9];
> +      out[i*12 + 10] = in[i*12 + 10];
> +      out[i*12 + 11] = in[i*12 + 11];
> +
> +      ia[i] = in[i*12 + 7];
> +    }
> +
> +  /* check results:  */
> +  for (i = 0; i < N/2; i++)
> +    {
> +      if (out[i*12] !=  in[i*12]
> +         || out[i*12 + 1] != in[i*12 + 1]
> +         || out[i*12 + 2] != in[i*12 + 2]
> +         || out[i*12 + 3] != in[i*12 + 3]
> +         || out[i*12 + 4] != in[i*12 + 4]
> +         || out[i*12 + 5] != in[i*12 + 5]
> +         || out[i*12 + 6] != in[i*12 + 6]
> +         || out[i*12 + 7] != in[i*12 + 7]
> +         || out[i*12 + 8] != in[i*12 + 8]
> +         || out[i*12 + 9] != in[i*12 + 9]
> +         || out[i*12 + 10] != in[i*12 + 10]
> +         || out[i*12 + 11] != in[i*12 + 11]
> +         || ia[i] != in[i*12 + 7])
> +        abort ();
> +    }
> +
> +  /* Hybrid SLP with unrolling by 2.  */
> +  for (i = 0; i < N; i++)
> +    {
> +      out[i*6] = in[i*6];
> +      out[i*6 + 1] = in[i*6 + 1];
> +      out[i*6 + 2] = in[i*6 + 2];
> +      out[i*6 + 3] = in[i*6 + 3];
> +      out[i*6 + 4] = in[i*6 + 4];
> +      out[i*6 + 5] = in[i*6 + 5];
> +
> +      ia[i] = i;
> +    }
> +
> +  /* check results:  */
> +  for (i = 0; i < N/2; i++)
> +    {
> +      if (out[i*6] !=  in[i*6]
> +         || out[i*6 + 1] != in[i*6 + 1]
> +         || out[i*6 + 2] != in[i*6 + 2]
> +         || out[i*6 + 3] != in[i*6 + 3]
> +         || out[i*6 + 4] != in[i*6 + 4]
> +         || out[i*6 + 5] != in[i*6 + 5]
> +         || ia[i] != i)
> +        abort ();
> +    }
> +
> +  return 0;
> +}
> +
> +int main (void)
> +{
> +  check_vect ();
> +
> +  main1 ();
> +
> +  return 0;
> +}
> +
> +/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */
> +/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 1 "vect" } } */
> +/* { dg-final { cleanup-tree-dump "vect" } } */
>

Patch

Index: gcc/testsuite/gcc.dg/vect/slp-11.c
===================================================================
--- gcc/testsuite/gcc.dg/vect/slp-11.c	2011-04-12 15:18:24.000000000 +0100
+++ /dev/null	2011-03-23 08:42:11.268792848 +0000
@@ -1,113 +0,0 @@ 
-/* { dg-require-effective-target vect_int } */
-
-#include <stdarg.h>
-#include "tree-vect.h"
-
-#define N 8 
-
-int
-main1 ()
-{
-  int i;
-  unsigned int out[N*8], a0, a1, a2, a3, a4, a5, a6, a7, b1, b0, b2, b3, b4, b5, b6, b7;
-  unsigned int in[N*8] = {0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63};
-  float out2[N*8];
-
-  /* Different operations - not SLPable.  */
-  for (i = 0; i < N; i++)
-    {
-      a0 = in[i*8] + 5;
-      a1 = in[i*8 + 1] * 6;
-      a2 = in[i*8 + 2] + 7;
-      a3 = in[i*8 + 3] + 8;
-      a4 = in[i*8 + 4] + 9;
-      a5 = in[i*8 + 5] + 10;
-      a6 = in[i*8 + 6] + 11;
-      a7 = in[i*8 + 7] + 12;
-
-      b0 = a0 * 3;
-      b1 = a1 * 2;
-      b2 = a2 * 12;
-      b3 = a3 * 5;
-      b4 = a4 * 8;
-      b5 = a5 * 4;
-      b6 = a6 * 3;
-      b7 = a7 * 2;
-
-      out[i*8] = b0 - 2;
-      out[i*8 + 1] = b1 - 3; 
-      out[i*8 + 2] = b2 - 2;
-      out[i*8 + 3] = b3 - 1;
-      out[i*8 + 4] = b4 - 8;
-      out[i*8 + 5] = b5 - 7;
-      out[i*8 + 6] = b6 - 3;
-      out[i*8 + 7] = b7 - 7;
-    }
-
-  /* check results:  */
-  for (i = 0; i < N; i++)
-    {
-      if (out[i*8] !=  (in[i*8] + 5) * 3 - 2
-         || out[i*8 + 1] != (in[i*8 + 1] * 6) * 2 - 3
-         || out[i*8 + 2] != (in[i*8 + 2] + 7) * 12 - 2
-         || out[i*8 + 3] != (in[i*8 + 3] + 8) * 5 - 1
-         || out[i*8 + 4] != (in[i*8 + 4] + 9) * 8 - 8
-         || out[i*8 + 5] != (in[i*8 + 5] + 10) * 4 - 7
-         || out[i*8 + 6] != (in[i*8 + 6] + 11) * 3 - 3
-         || out[i*8 + 7] != (in[i*8 + 7] + 12) * 2 - 7)
-	abort ();
-    }
-
-  /* Requires permutation - not SLPable.  */
-  for (i = 0; i < N*2; i++)
-    {
-      out[i*4] = (in[i*4] + 2) * 3;
-      out[i*4 + 1] = (in[i*4 + 2] + 2) * 7;
-      out[i*4 + 2] = (in[i*4 + 1] + 7) * 3;
-      out[i*4 + 3] = (in[i*4 + 3] + 3) * 4;
-    }
-
-  /* check results:  */
-  for (i = 0; i < N*2; i++)
-    {
-      if (out[i*4] !=  (in[i*4] + 2) * 3
-         || out[i*4 + 1] != (in[i*4 + 2] + 2) * 7
-         || out[i*4 + 2] != (in[i*4 + 1] + 7) * 3
-         || out[i*4 + 3] != (in[i*4 + 3] + 3) * 4)
-        abort ();
-    }
-
-  /* Different operations - not SLPable.  */
-  for (i = 0; i < N*4; i++)
-    {
-      out2[i*2] = ((float) in[i*2] * 2 + 6) ;
-      out2[i*2 + 1] = (float) (in[i*2 + 1] * 3 + 7);
-    }
-
-  /* check results:  */
-  for (i = 0; i < N*4; i++)
-    {
-      if (out2[i*2] !=  ((float) in[i*2] * 2 + 6)
-         || out2[i*2 + 1] != (float) (in[i*2 + 1] * 3 + 7))
-        abort ();
-    }
-
-
-  return 0;
-}
-
-int main (void)
-{
-  check_vect ();
-
-  main1 ();
-
-  return 0;
-}
-
-/* { dg-final { scan-tree-dump-times "vectorized 3 loops" 1 "vect"  { target { { vect_uintfloat_cvt && vect_strided } &&  vect_int_mult } } } } */
-/* { dg-final { scan-tree-dump-times "vectorized 2 loops" 1 "vect"  { target { { { ! vect_uintfloat_cvt } && vect_strided } &&  vect_int_mult } } } } */
-/* { dg-final { scan-tree-dump-times "vectorized 0 loops" 1 "vect"  {target  { ! { vect_int_mult && vect_strided } } } } }  */
-/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 0  "vect"  } } */
-/* { dg-final { cleanup-tree-dump "vect" } } */
-  
Index: gcc/testsuite/gcc.dg/vect/slp-11a.c
===================================================================
--- /dev/null	2011-03-23 08:42:11.268792848 +0000
+++ gcc/testsuite/gcc.dg/vect/slp-11a.c	2011-04-12 15:18:25.000000000 +0100
@@ -0,0 +1,75 @@ 
+/* { dg-require-effective-target vect_int } */
+
+#include <stdarg.h>
+#include "tree-vect.h"
+
+#define N 8
+
+int
+main1 ()
+{
+  int i;
+  unsigned int out[N*8], a0, a1, a2, a3, a4, a5, a6, a7, b1, b0, b2, b3, b4, b5, b6, b7;
+  unsigned int in[N*8] = {0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63};
+
+  /* Different operations - not SLPable.  */
+  for (i = 0; i < N; i++)
+    {
+      a0 = in[i*8] + 5;
+      a1 = in[i*8 + 1] * 6;
+      a2 = in[i*8 + 2] + 7;
+      a3 = in[i*8 + 3] + 8;
+      a4 = in[i*8 + 4] + 9;
+      a5 = in[i*8 + 5] + 10;
+      a6 = in[i*8 + 6] + 11;
+      a7 = in[i*8 + 7] + 12;
+
+      b0 = a0 * 3;
+      b1 = a1 * 2;
+      b2 = a2 * 12;
+      b3 = a3 * 5;
+      b4 = a4 * 8;
+      b5 = a5 * 4;
+      b6 = a6 * 3;
+      b7 = a7 * 2;
+
+      out[i*8] = b0 - 2;
+      out[i*8 + 1] = b1 - 3;
+      out[i*8 + 2] = b2 - 2;
+      out[i*8 + 3] = b3 - 1;
+      out[i*8 + 4] = b4 - 8;
+      out[i*8 + 5] = b5 - 7;
+      out[i*8 + 6] = b6 - 3;
+      out[i*8 + 7] = b7 - 7;
+    }
+
+  /* check results:  */
+  for (i = 0; i < N; i++)
+    {
+      if (out[i*8] !=  (in[i*8] + 5) * 3 - 2
+         || out[i*8 + 1] != (in[i*8 + 1] * 6) * 2 - 3
+         || out[i*8 + 2] != (in[i*8 + 2] + 7) * 12 - 2
+         || out[i*8 + 3] != (in[i*8 + 3] + 8) * 5 - 1
+         || out[i*8 + 4] != (in[i*8 + 4] + 9) * 8 - 8
+         || out[i*8 + 5] != (in[i*8 + 5] + 10) * 4 - 7
+         || out[i*8 + 6] != (in[i*8 + 6] + 11) * 3 - 3
+         || out[i*8 + 7] != (in[i*8 + 7] + 12) * 2 - 7)
+	abort ();
+    }
+
+  return 0;
+}
+
+int main (void)
+{
+  check_vect ();
+
+  main1 ();
+
+  return 0;
+}
+
+/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target { vect_strided && vect_int_mult } } } } */
+/* { dg-final { scan-tree-dump-times "vectorized 0 loops" 1 "vect" { target { ! { vect_strided && vect_int_mult } } } } } */
+/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 0 "vect" } } */
+/* { dg-final { cleanup-tree-dump "vect" } } */
Index: gcc/testsuite/gcc.dg/vect/slp-11b.c
===================================================================
--- /dev/null	2011-03-23 08:42:11.268792848 +0000
+++ gcc/testsuite/gcc.dg/vect/slp-11b.c	2011-04-12 15:18:25.000000000 +0100
@@ -0,0 +1,49 @@ 
+/* { dg-require-effective-target vect_int } */
+
+#include <stdarg.h>
+#include "tree-vect.h"
+
+#define N 8
+
+int
+main1 ()
+{
+  int i;
+  unsigned int out[N*8], a0, a1, a2, a3, a4, a5, a6, a7, b1, b0, b2, b3, b4, b5, b6, b7;
+  unsigned int in[N*8] = {0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63};
+
+  /* Requires permutation - not SLPable.  */
+  for (i = 0; i < N*2; i++)
+    {
+      out[i*4] = (in[i*4] + 2) * 3;
+      out[i*4 + 1] = (in[i*4 + 2] + 2) * 7;
+      out[i*4 + 2] = (in[i*4 + 1] + 7) * 3;
+      out[i*4 + 3] = (in[i*4 + 3] + 3) * 4;
+    }
+
+  /* check results:  */
+  for (i = 0; i < N*2; i++)
+    {
+      if (out[i*4] !=  (in[i*4] + 2) * 3
+         || out[i*4 + 1] != (in[i*4 + 2] + 2) * 7
+         || out[i*4 + 2] != (in[i*4 + 1] + 7) * 3
+         || out[i*4 + 3] != (in[i*4 + 3] + 3) * 4)
+        abort ();
+    }
+
+  return 0;
+}
+
+int main (void)
+{
+  check_vect ();
+
+  main1 ();
+
+  return 0;
+}
+
+/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target { vect_strided && vect_int_mult } } } } */
+/* { dg-final { scan-tree-dump-times "vectorized 0 loops" 1 "vect" { target { ! { vect_strided && vect_int_mult } } } } } */
+/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 0 "vect" } } */
+/* { dg-final { cleanup-tree-dump "vect" } } */
Index: gcc/testsuite/gcc.dg/vect/slp-11c.c
===================================================================
--- /dev/null	2011-03-23 08:42:11.268792848 +0000
+++ gcc/testsuite/gcc.dg/vect/slp-11c.c	2011-04-12 15:18:25.000000000 +0100
@@ -0,0 +1,46 @@ 
+/* { dg-require-effective-target vect_int } */
+
+#include <stdarg.h>
+#include "tree-vect.h"
+
+#define N 8
+
+int
+main1 ()
+{
+  int i;
+  unsigned int in[N*8] = {0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63};
+  float out[N*8];
+
+  /* Different operations - not SLPable.  */
+  for (i = 0; i < N*4; i++)
+    {
+      out[i*2] = ((float) in[i*2] * 2 + 6) ;
+      out[i*2 + 1] = (float) (in[i*2 + 1] * 3 + 7);
+    }
+
+  /* check results:  */
+  for (i = 0; i < N*4; i++)
+    {
+      if (out[i*2] !=  ((float) in[i*2] * 2 + 6)
+         || out[i*2 + 1] != (float) (in[i*2 + 1] * 3 + 7))
+        abort ();
+    }
+
+
+  return 0;
+}
+
+int main (void)
+{
+  check_vect ();
+
+  main1 ();
+
+  return 0;
+}
+
+/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target { { vect_uintfloat_cvt && vect_strided } && vect_int_mult } } } } */
+/* { dg-final { scan-tree-dump-times "vectorized 0 loops" 1 "vect" { target { ! { { vect_uintfloat_cvt && vect_strided } && vect_int_mult } } } } } */
+/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 0  "vect"  } } */
+/* { dg-final { cleanup-tree-dump "vect" } } */
Index: gcc/testsuite/gcc.dg/vect/slp-12a.c
===================================================================
--- gcc/testsuite/gcc.dg/vect/slp-12a.c	2011-04-12 15:18:24.000000000 +0100
+++ gcc/testsuite/gcc.dg/vect/slp-12a.c	2011-04-12 15:18:25.000000000 +0100
@@ -11,7 +11,7 @@  main1 ()
   int i;
   unsigned int out[N*8], a0, a1, a2, a3, a4, a5, a6, a7, b1, b0, b2, b3, b4, b5, b6, b7;
   unsigned int in[N*8] = {0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63};
-  unsigned int ia[N], ib[N*2];
+  unsigned int ia[N];
 
   for (i = 0; i < N; i++)
     {
@@ -61,27 +61,6 @@  main1 ()
 	abort ();
     }
 
-  for (i = 0; i < N*2; i++)
-    {
-      out[i*4] = (in[i*4] + 2) * 3;
-      out[i*4 + 1] = (in[i*4 + 1] + 2) * 7;
-      out[i*4 + 2] = (in[i*4 + 2] + 7) * 3;
-      out[i*4 + 3] = (in[i*4 + 3] + 7) * 7;
-
-      ib[i] = 7;
-    }
-
-  /* check results:  */
-  for (i = 0; i < N*2; i++)
-    {
-      if (out[i*4] !=  (in[i*4] + 2) * 3
-         || out[i*4 + 1] != (in[i*4 + 1] + 2) * 7
-         || out[i*4 + 2] != (in[i*4 + 2] + 7) * 3
-         || out[i*4 + 3] != (in[i*4 + 3] + 7) * 7 
-         || ib[i] != 7)
-        abort ();
-    }
-
   return 0;
 }
 
@@ -94,11 +73,8 @@  int main (void)
   return 0;
 }
 
-/* { dg-final { scan-tree-dump-times "vectorized 2 loops" 1 "vect"  {target { vect_strided && vect_int_mult} } } } */
-/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect"  {target { {! {vect_strided}} && vect_int_mult } } } } */
-/* { dg-final { scan-tree-dump-times "vectorized 0 loops" 1 "vect"  {target  { ! vect_int_mult } } } } */
-/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 2 "vect" {target { vect_strided && vect_int_mult } } } } */
-/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 1 "vect" {target { {! {vect_strided}} && vect_int_mult } } } } */
-/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 0 "vect" {target  { ! vect_int_mult } } } } */
+/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target { vect_strided && vect_int_mult } } } } */
+/* { dg-final { scan-tree-dump-times "vectorized 0 loops" 1 "vect" { target { ! { vect_strided && vect_int_mult } } } } } */
+/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 1 "vect" { target { vect_strided && vect_int_mult } } } } */
+/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 0 "vect" { target { ! { vect_strided && vect_int_mult } } } } } */
 /* { dg-final { cleanup-tree-dump "vect" } } */
-  
Index: gcc/testsuite/gcc.dg/vect/slp-12c.c
===================================================================
--- /dev/null	2011-03-23 08:42:11.268792848 +0000
+++ gcc/testsuite/gcc.dg/vect/slp-12c.c	2011-04-12 15:18:25.000000000 +0100
@@ -0,0 +1,53 @@ 
+/* { dg-require-effective-target vect_int } */
+
+#include <stdarg.h>
+#include "tree-vect.h"
+
+#define N 8
+
+int
+main1 ()
+{
+  int i;
+  unsigned int out[N*8], a0, a1, a2, a3, a4, a5, a6, a7, b1, b0, b2, b3, b4, b5, b6, b7;
+  unsigned int in[N*8] = {0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63};
+  unsigned int ia[N*2];
+
+  for (i = 0; i < N*2; i++)
+    {
+      out[i*4] = (in[i*4] + 2) * 3;
+      out[i*4 + 1] = (in[i*4 + 1] + 2) * 7;
+      out[i*4 + 2] = (in[i*4 + 2] + 7) * 3;
+      out[i*4 + 3] = (in[i*4 + 3] + 7) * 7;
+
+      ia[i] = 7;
+    }
+
+  /* check results:  */
+  for (i = 0; i < N*2; i++)
+    {
+      if (out[i*4] !=  (in[i*4] + 2) * 3
+         || out[i*4 + 1] != (in[i*4 + 1] + 2) * 7
+         || out[i*4 + 2] != (in[i*4 + 2] + 7) * 3
+         || out[i*4 + 3] != (in[i*4 + 3] + 7) * 7
+         || ia[i] != 7)
+        abort ();
+    }
+
+  return 0;
+}
+
+int main (void)
+{
+  check_vect ();
+
+  main1 ();
+
+  return 0;
+}
+
+/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect"  { target { vect_int_mult } } } } */
+/* { dg-final { scan-tree-dump-times "vectorized 0 loops" 1 "vect"  { target { ! vect_int_mult } } } } */
+/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 1 "vect" { target vect_int_mult } } } */
+/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 0 "vect" { target { ! vect_int_mult } } } } */
+/* { dg-final { cleanup-tree-dump "vect" } } */
Index: gcc/testsuite/gcc.dg/vect/slp-19.c
===================================================================
--- gcc/testsuite/gcc.dg/vect/slp-19.c	2011-04-12 15:18:24.000000000 +0100
+++ /dev/null	2011-03-23 08:42:11.268792848 +0000
@@ -1,154 +0,0 @@ 
-/* { dg-require-effective-target vect_int } */
-
-#include <stdarg.h>
-#include "tree-vect.h"
-
-#define N 16 
-
-int
-main1 ()
-{
-  unsigned int i;
-  unsigned int out[N*8];
-  unsigned int in[N*8] = {0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63};
-  unsigned int ia[N*2], a0, a1, a2, a3;
-
-  for (i = 0; i < N; i++)
-    {
-      out[i*8] = in[i*8];
-      out[i*8 + 1] = in[i*8 + 1];
-      out[i*8 + 2] = in[i*8 + 2];
-      out[i*8 + 3] = in[i*8 + 3];
-      out[i*8 + 4] = in[i*8 + 4];
-      out[i*8 + 5] = in[i*8 + 5];
-      out[i*8 + 6] = in[i*8 + 6];
-      out[i*8 + 7] = in[i*8 + 7];
-    
-      ia[i] = in[i*8 + 2];
-    }
-
-  /* check results:  */
-  for (i = 0; i < N; i++)
-    {
-      if (out[i*8] !=  in[i*8]
-         || out[i*8 + 1] != in[i*8 + 1]
-         || out[i*8 + 2] != in[i*8 + 2]
-         || out[i*8 + 3] != in[i*8 + 3]
-         || out[i*8 + 4] != in[i*8 + 4]
-         || out[i*8 + 5] != in[i*8 + 5]
-         || out[i*8 + 6] != in[i*8 + 6]
-         || out[i*8 + 7] != in[i*8 + 7]
-         || ia[i] != in[i*8 + 2])
-	abort ();
-    }
-
-  for (i = 0; i < N*2; i++)
-    {
-      a0 = in[i*4] + 1;
-      a1 = in[i*4 + 1] + 2;
-      a2 = in[i*4 + 2] + 3;
-      a3 = in[i*4 + 3] + 4;
-
-      out[i*4] = a0;
-      out[i*4 + 1] = a1;
-      out[i*4 + 2] = a2;
-      out[i*4 + 3] = a3;
-
-      ia[i] = a2;
-    }
-
-  /* check results:  */
-  for (i = 0; i < N*2; i++)
-    {
-      if (out[i*4] !=  in[i*4] + 1
-         || out[i*4 + 1] != in[i*4 + 1] + 2
-         || out[i*4 + 2] != in[i*4 + 2] + 3
-         || out[i*4 + 3] != in[i*4 + 3] + 4
-         || ia[i] != in[i*4 + 2] + 3)
-        abort ();
-    }
-
-  /* The last stmt requires interleaving of not power of 2 size - not 
-     vectorizable.  */
-  for (i = 0; i < N/2; i++)
-    {
-      out[i*12] = in[i*12];
-      out[i*12 + 1] = in[i*12 + 1];
-      out[i*12 + 2] = in[i*12 + 2];
-      out[i*12 + 3] = in[i*12 + 3];
-      out[i*12 + 4] = in[i*12 + 4];
-      out[i*12 + 5] = in[i*12 + 5];
-      out[i*12 + 6] = in[i*12 + 6];
-      out[i*12 + 7] = in[i*12 + 7];
-      out[i*12 + 8] = in[i*12 + 8];
-      out[i*12 + 9] = in[i*12 + 9];
-      out[i*12 + 10] = in[i*12 + 10];
-      out[i*12 + 11] = in[i*12 + 11];
-
-      ia[i] = in[i*12 + 7];
-    }
-
-  /* check results:  */
-  for (i = 0; i < N/2; i++)
-    {
-      if (out[i*12] !=  in[i*12]
-         || out[i*12 + 1] != in[i*12 + 1]
-         || out[i*12 + 2] != in[i*12 + 2]
-         || out[i*12 + 3] != in[i*12 + 3]
-         || out[i*12 + 4] != in[i*12 + 4]
-         || out[i*12 + 5] != in[i*12 + 5]
-         || out[i*12 + 6] != in[i*12 + 6]
-         || out[i*12 + 7] != in[i*12 + 7]
-         || out[i*12 + 8] != in[i*12 + 8]
-         || out[i*12 + 9] != in[i*12 + 9]
-         || out[i*12 + 10] != in[i*12 + 10]
-         || out[i*12 + 11] != in[i*12 + 11]
-         || ia[i] != in[i*12 + 7])
-        abort ();
-    }
-
-  /* Hybrid SLP with unrolling by 2.  */
-  for (i = 0; i < N; i++)
-    {
-      out[i*6] = in[i*6];
-      out[i*6 + 1] = in[i*6 + 1];
-      out[i*6 + 2] = in[i*6 + 2];
-      out[i*6 + 3] = in[i*6 + 3];
-      out[i*6 + 4] = in[i*6 + 4];
-      out[i*6 + 5] = in[i*6 + 5];
-    
-      ia[i] = i;
-    } 
-    
-  /* check results:  */
-  for (i = 0; i < N/2; i++)
-    {
-      if (out[i*6] !=  in[i*6]
-         || out[i*6 + 1] != in[i*6 + 1]
-         || out[i*6 + 2] != in[i*6 + 2]
-         || out[i*6 + 3] != in[i*6 + 3]
-         || out[i*6 + 4] != in[i*6 + 4]
-         || out[i*6 + 5] != in[i*6 + 5]
-         || ia[i] != i)
-        abort ();
-    }
-
-
-  return 0;
-}
-
-int main (void)
-{
-  check_vect ();
-
-  main1 ();
-
-  return 0;
-}
-
-/* { dg-final { scan-tree-dump-times "vectorized 3 loops" 1 "vect" { target  vect_strided  } } } */
-/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target  { ! { vect_strided } } } } } */
-/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 3 "vect"  { target  vect_strided  } } } */
-/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 1 "vect"  { target { ! { vect_strided } } } } } */
-/* { dg-final { cleanup-tree-dump "vect" } } */
-  
Index: gcc/testsuite/gcc.dg/vect/slp-19a.c
===================================================================
--- /dev/null	2011-03-23 08:42:11.268792848 +0000
+++ gcc/testsuite/gcc.dg/vect/slp-19a.c	2011-04-12 15:18:25.000000000 +0100
@@ -0,0 +1,61 @@ 
+/* { dg-require-effective-target vect_int } */
+
+#include <stdarg.h>
+#include "tree-vect.h"
+
+#define N 16
+
+int
+main1 ()
+{
+  unsigned int i;
+  unsigned int out[N*8];
+  unsigned int in[N*8] = {0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63};
+  unsigned int ia[N*2];
+
+  for (i = 0; i < N; i++)
+    {
+      out[i*8] = in[i*8];
+      out[i*8 + 1] = in[i*8 + 1];
+      out[i*8 + 2] = in[i*8 + 2];
+      out[i*8 + 3] = in[i*8 + 3];
+      out[i*8 + 4] = in[i*8 + 4];
+      out[i*8 + 5] = in[i*8 + 5];
+      out[i*8 + 6] = in[i*8 + 6];
+      out[i*8 + 7] = in[i*8 + 7];
+
+      ia[i] = in[i*8 + 2];
+    }
+
+  /* check results:  */
+  for (i = 0; i < N; i++)
+    {
+      if (out[i*8] !=  in[i*8]
+         || out[i*8 + 1] != in[i*8 + 1]
+         || out[i*8 + 2] != in[i*8 + 2]
+         || out[i*8 + 3] != in[i*8 + 3]
+         || out[i*8 + 4] != in[i*8 + 4]
+         || out[i*8 + 5] != in[i*8 + 5]
+         || out[i*8 + 6] != in[i*8 + 6]
+         || out[i*8 + 7] != in[i*8 + 7]
+         || ia[i] != in[i*8 + 2])
+	abort ();
+    }
+
+  return 0;
+}
+
+int main (void)
+{
+  check_vect ();
+
+  main1 ();
+
+  return 0;
+}
+
+/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target vect_strided } } } */
+/* { dg-final { scan-tree-dump-times "vectorized 0 loops" 1 "vect" { target { ! vect_strided } } } } */
+/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 1 "vect" { target vect_strided } } } */
+/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 0 "vect" { target { ! vect_strided } } } } */
+/* { dg-final { cleanup-tree-dump "vect" } } */
Index: gcc/testsuite/gcc.dg/vect/slp-19b.c
===================================================================
--- /dev/null	2011-03-23 08:42:11.268792848 +0000
+++ gcc/testsuite/gcc.dg/vect/slp-19b.c	2011-04-12 15:18:25.000000000 +0100
@@ -0,0 +1,58 @@ 
+/* { dg-require-effective-target vect_int } */
+
+#include <stdarg.h>
+#include "tree-vect.h"
+
+#define N 16
+
+int
+main1 ()
+{
+  unsigned int i;
+  unsigned int out[N*8];
+  unsigned int in[N*8] = {0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63};
+  unsigned int ia[N*2], a0, a1, a2, a3;
+
+  for (i = 0; i < N*2; i++)
+    {
+      a0 = in[i*4] + 1;
+      a1 = in[i*4 + 1] + 2;
+      a2 = in[i*4 + 2] + 3;
+      a3 = in[i*4 + 3] + 4;
+
+      out[i*4] = a0;
+      out[i*4 + 1] = a1;
+      out[i*4 + 2] = a2;
+      out[i*4 + 3] = a3;
+
+      ia[i] = a2;
+    }
+
+  /* check results:  */
+  for (i = 0; i < N*2; i++)
+    {
+      if (out[i*4] !=  in[i*4] + 1
+         || out[i*4 + 1] != in[i*4 + 1] + 2
+         || out[i*4 + 2] != in[i*4 + 2] + 3
+         || out[i*4 + 3] != in[i*4 + 3] + 4
+         || ia[i] != in[i*4 + 2] + 3)
+        abort ();
+    }
+
+  return 0;
+}
+
+int main (void)
+{
+  check_vect ();
+
+  main1 ();
+
+  return 0;
+}
+
+/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target vect_strided } } } */
+/* { dg-final { scan-tree-dump-times "vectorized 0 loops" 1 "vect" { target { ! vect_strided } } } } */
+/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 1 "vect" { target vect_strided } } } */
+/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 0 "vect" { target { ! vect_strided } } } } */
+/* { dg-final { cleanup-tree-dump "vect" } } */
Index: gcc/testsuite/gcc.dg/vect/slp-19c.c
===================================================================
--- /dev/null	2011-03-23 08:42:11.268792848 +0000
+++ gcc/testsuite/gcc.dg/vect/slp-19c.c	2011-04-12 15:18:25.000000000 +0100
@@ -0,0 +1,95 @@ 
+/* { dg-require-effective-target vect_int } */
+
+#include <stdarg.h>
+#include "tree-vect.h"
+
+#define N 16
+
+int
+main1 ()
+{
+  unsigned int i;
+  unsigned int out[N*8];
+  unsigned int in[N*8] = {0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63};
+  unsigned int ia[N*2], a0, a1, a2, a3;
+
+  /* The last stmt requires interleaving of not power of 2 size - not
+     vectorizable.  */
+  for (i = 0; i < N/2; i++)
+    {
+      out[i*12] = in[i*12];
+      out[i*12 + 1] = in[i*12 + 1];
+      out[i*12 + 2] = in[i*12 + 2];
+      out[i*12 + 3] = in[i*12 + 3];
+      out[i*12 + 4] = in[i*12 + 4];
+      out[i*12 + 5] = in[i*12 + 5];
+      out[i*12 + 6] = in[i*12 + 6];
+      out[i*12 + 7] = in[i*12 + 7];
+      out[i*12 + 8] = in[i*12 + 8];
+      out[i*12 + 9] = in[i*12 + 9];
+      out[i*12 + 10] = in[i*12 + 10];
+      out[i*12 + 11] = in[i*12 + 11];
+
+      ia[i] = in[i*12 + 7];
+    }
+
+  /* check results:  */
+  for (i = 0; i < N/2; i++)
+    {
+      if (out[i*12] !=  in[i*12]
+         || out[i*12 + 1] != in[i*12 + 1]
+         || out[i*12 + 2] != in[i*12 + 2]
+         || out[i*12 + 3] != in[i*12 + 3]
+         || out[i*12 + 4] != in[i*12 + 4]
+         || out[i*12 + 5] != in[i*12 + 5]
+         || out[i*12 + 6] != in[i*12 + 6]
+         || out[i*12 + 7] != in[i*12 + 7]
+         || out[i*12 + 8] != in[i*12 + 8]
+         || out[i*12 + 9] != in[i*12 + 9]
+         || out[i*12 + 10] != in[i*12 + 10]
+         || out[i*12 + 11] != in[i*12 + 11]
+         || ia[i] != in[i*12 + 7])
+        abort ();
+    }
+
+  /* Hybrid SLP with unrolling by 2.  */
+  for (i = 0; i < N; i++)
+    {
+      out[i*6] = in[i*6];
+      out[i*6 + 1] = in[i*6 + 1];
+      out[i*6 + 2] = in[i*6 + 2];
+      out[i*6 + 3] = in[i*6 + 3];
+      out[i*6 + 4] = in[i*6 + 4];
+      out[i*6 + 5] = in[i*6 + 5];
+
+      ia[i] = i;
+    }
+
+  /* check results:  */
+  for (i = 0; i < N/2; i++)
+    {
+      if (out[i*6] !=  in[i*6]
+         || out[i*6 + 1] != in[i*6 + 1]
+         || out[i*6 + 2] != in[i*6 + 2]
+         || out[i*6 + 3] != in[i*6 + 3]
+         || out[i*6 + 4] != in[i*6 + 4]
+         || out[i*6 + 5] != in[i*6 + 5]
+         || ia[i] != i)
+        abort ();
+    }
+
+  return 0;
+}
+
+int main (void)
+{
+  check_vect ();
+
+  main1 ();
+
+  return 0;
+}
+
+/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */
+/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 1 "vect" } } */
+/* { dg-final { cleanup-tree-dump "vect" } } */