diff mbox

[vec-tails,10/10] Tests

Message ID 20160705154436.GA51351@msticlxl57.ims.intel.com
State New
Headers show

Commit Message

Ilya Enkovich July 5, 2016, 3:44 p.m. UTC
Hi,

This patch adds several tests to check tails vectorization functionality.

Thanks,
Ilya
--
gcc/testsuite/

2016-07-05  Ilya Enkovich  <ilya.enkovich@intel.com>

	* lib/target-supports.exp (check_avx2_hw_available): New.
	(check_effective_target_avx2_runtime): New.
	* gcc.dg/vect/vect-tail-combine-1.c: New test.
	* gcc.dg/vect/vect-tail-combine-2.c: New test.
	* gcc.dg/vect/vect-tail-combine-3.c: New test.
	* gcc.dg/vect/vect-tail-combine-4.c: New test.
	* gcc.dg/vect/vect-tail-combine-5.c: New test.
	* gcc.dg/vect/vect-tail-combine-6.c: New test.
	* gcc.dg/vect/vect-tail-combine-7.c: New test.
	* gcc.dg/vect/vect-tail-combine-9.c: New test.
	* gcc.dg/vect/vect-tail-mask-1.c: New test.
	* gcc.dg/vect/vect-tail-mask-2.c: New test.
	* gcc.dg/vect/vect-tail-mask-3.c: New test.
	* gcc.dg/vect/vect-tail-mask-4.c: New test.
	* gcc.dg/vect/vect-tail-mask-5.c: New test.
	* gcc.dg/vect/vect-tail-mask-6.c: New test.
	* gcc.dg/vect/vect-tail-mask-7.c: New test.
	* gcc.dg/vect/vect-tail-mask-8.c: New test.
	* gcc.dg/vect/vect-tail-mask-9.c: New test.
	* gcc.dg/vect/vect-tail-nomask-1.c: New test.
	* gcc.dg/vect/vect-tail-nomask-2.c: New test.
	* gcc.dg/vect/vect-tail-nomask-3.c: New test.
	* gcc.dg/vect/vect-tail-nomask-4.c: New test.
	* gcc.dg/vect/vect-tail-nomask-5.c: New test.
	* gcc.dg/vect/vect-tail-nomask-6.c: New test.
	* gcc.dg/vect/vect-tail-nomask-7.c: New test.

Comments

Jeff Law July 14, 2016, 5:32 p.m. UTC | #1
On 07/05/2016 09:44 AM, Ilya Enkovich wrote:
> Hi,
>
> This patch adds several tests to check tails vectorization functionality.
>
> Thanks,
> Ilya
> --
> gcc/testsuite/
>
> 2016-07-05  Ilya Enkovich  <ilya.enkovich@intel.com>
>
> 	* lib/target-supports.exp (check_avx2_hw_available): New.
> 	(check_effective_target_avx2_runtime): New.
> 	* gcc.dg/vect/vect-tail-combine-1.c: New test.
> 	* gcc.dg/vect/vect-tail-combine-2.c: New test.
> 	* gcc.dg/vect/vect-tail-combine-3.c: New test.
> 	* gcc.dg/vect/vect-tail-combine-4.c: New test.
> 	* gcc.dg/vect/vect-tail-combine-5.c: New test.
> 	* gcc.dg/vect/vect-tail-combine-6.c: New test.
> 	* gcc.dg/vect/vect-tail-combine-7.c: New test.
> 	* gcc.dg/vect/vect-tail-combine-9.c: New test.
> 	* gcc.dg/vect/vect-tail-mask-1.c: New test.
> 	* gcc.dg/vect/vect-tail-mask-2.c: New test.
> 	* gcc.dg/vect/vect-tail-mask-3.c: New test.
> 	* gcc.dg/vect/vect-tail-mask-4.c: New test.
> 	* gcc.dg/vect/vect-tail-mask-5.c: New test.
> 	* gcc.dg/vect/vect-tail-mask-6.c: New test.
> 	* gcc.dg/vect/vect-tail-mask-7.c: New test.
> 	* gcc.dg/vect/vect-tail-mask-8.c: New test.
> 	* gcc.dg/vect/vect-tail-mask-9.c: New test.
> 	* gcc.dg/vect/vect-tail-nomask-1.c: New test.
> 	* gcc.dg/vect/vect-tail-nomask-2.c: New test.
> 	* gcc.dg/vect/vect-tail-nomask-3.c: New test.
> 	* gcc.dg/vect/vect-tail-nomask-4.c: New test.
> 	* gcc.dg/vect/vect-tail-nomask-5.c: New test.
> 	* gcc.dg/vect/vect-tail-nomask-6.c: New test.
> 	* gcc.dg/vect/vect-tail-nomask-7.c: New test.
This is fine when the rest of the patches go in.


> +		  unsigned int eax, ebx, ecx, edx;
> +		  if (!__get_cpuid (1, &eax, &ebx, &ecx, &edx)
> +		      || ((ecx & bit_OSXSAVE) != bit_OSXSAVE))
> +		    return 1;
> +
> +		  if (__get_cpuid_max (0, NULL) < 7)
> +		    return 1;
> +
> +		  __cpuid_count (7, 0, eax, ebx, ecx, edx);
> +
> +		  return (ebx & bit_AVX2) != bit_AVX2;
Ugh.  I'm going to trust this is correct.  I vaguely recall mucking 
around with this stuff for the original AVX in glibc several years ago.

jeff
Ilya Enkovich July 15, 2016, 9:39 a.m. UTC | #2
2016-07-14 20:32 GMT+03:00 Jeff Law <law@redhat.com>:
> On 07/05/2016 09:44 AM, Ilya Enkovich wrote:
>>
>> Hi,
>>
>> This patch adds several tests to check tails vectorization functionality.
>>
>> Thanks,
>> Ilya
>> --
>> gcc/testsuite/
>>
>> 2016-07-05  Ilya Enkovich  <ilya.enkovich@intel.com>
>>
>>         * lib/target-supports.exp (check_avx2_hw_available): New.
>>         (check_effective_target_avx2_runtime): New.
>>         * gcc.dg/vect/vect-tail-combine-1.c: New test.
>>         * gcc.dg/vect/vect-tail-combine-2.c: New test.
>>         * gcc.dg/vect/vect-tail-combine-3.c: New test.
>>         * gcc.dg/vect/vect-tail-combine-4.c: New test.
>>         * gcc.dg/vect/vect-tail-combine-5.c: New test.
>>         * gcc.dg/vect/vect-tail-combine-6.c: New test.
>>         * gcc.dg/vect/vect-tail-combine-7.c: New test.
>>         * gcc.dg/vect/vect-tail-combine-9.c: New test.
>>         * gcc.dg/vect/vect-tail-mask-1.c: New test.
>>         * gcc.dg/vect/vect-tail-mask-2.c: New test.
>>         * gcc.dg/vect/vect-tail-mask-3.c: New test.
>>         * gcc.dg/vect/vect-tail-mask-4.c: New test.
>>         * gcc.dg/vect/vect-tail-mask-5.c: New test.
>>         * gcc.dg/vect/vect-tail-mask-6.c: New test.
>>         * gcc.dg/vect/vect-tail-mask-7.c: New test.
>>         * gcc.dg/vect/vect-tail-mask-8.c: New test.
>>         * gcc.dg/vect/vect-tail-mask-9.c: New test.
>>         * gcc.dg/vect/vect-tail-nomask-1.c: New test.
>>         * gcc.dg/vect/vect-tail-nomask-2.c: New test.
>>         * gcc.dg/vect/vect-tail-nomask-3.c: New test.
>>         * gcc.dg/vect/vect-tail-nomask-4.c: New test.
>>         * gcc.dg/vect/vect-tail-nomask-5.c: New test.
>>         * gcc.dg/vect/vect-tail-nomask-6.c: New test.
>>         * gcc.dg/vect/vect-tail-nomask-7.c: New test.
>
> This is fine when the rest of the patches go in.
>
>
>> +                 unsigned int eax, ebx, ecx, edx;
>> +                 if (!__get_cpuid (1, &eax, &ebx, &ecx, &edx)
>> +                     || ((ecx & bit_OSXSAVE) != bit_OSXSAVE))
>> +                   return 1;
>> +
>> +                 if (__get_cpuid_max (0, NULL) < 7)
>> +                   return 1;
>> +
>> +                 __cpuid_count (7, 0, eax, ebx, ecx, edx);
>> +
>> +                 return (ebx & bit_AVX2) != bit_AVX2;
>
> Ugh.  I'm going to trust this is correct.  I vaguely recall mucking around
> with this stuff for the original AVX in glibc several years ago.

Actually I just copied some code from avx2-check.h.  Kirill should be able
to review this piece of code.

Thanks,
Ilya

>
> jeff
>
Kirill Yukhin July 19, 2016, 11:56 a.m. UTC | #3
Hi!
On 15 Jul 12:39, Ilya Enkovich wrote:
> 2016-07-14 20:32 GMT+03:00 Jeff Law <law@redhat.com>:
> > On 07/05/2016 09:44 AM, Ilya Enkovich wrote:
> >>
> >> Hi,
> >>
> >> This patch adds several tests to check tails vectorization functionality.
> >>
> >> Thanks,
> >> Ilya
> >> --
> >> gcc/testsuite/
> >>
> >> 2016-07-05  Ilya Enkovich  <ilya.enkovich@intel.com>
> >>
> >>         * lib/target-supports.exp (check_avx2_hw_available): New.
> >>         (check_effective_target_avx2_runtime): New.
> >>         * gcc.dg/vect/vect-tail-combine-1.c: New test.
> >>         * gcc.dg/vect/vect-tail-combine-2.c: New test.
> >>         * gcc.dg/vect/vect-tail-combine-3.c: New test.
> >>         * gcc.dg/vect/vect-tail-combine-4.c: New test.
> >>         * gcc.dg/vect/vect-tail-combine-5.c: New test.
> >>         * gcc.dg/vect/vect-tail-combine-6.c: New test.
> >>         * gcc.dg/vect/vect-tail-combine-7.c: New test.
> >>         * gcc.dg/vect/vect-tail-combine-9.c: New test.
> >>         * gcc.dg/vect/vect-tail-mask-1.c: New test.
> >>         * gcc.dg/vect/vect-tail-mask-2.c: New test.
> >>         * gcc.dg/vect/vect-tail-mask-3.c: New test.
> >>         * gcc.dg/vect/vect-tail-mask-4.c: New test.
> >>         * gcc.dg/vect/vect-tail-mask-5.c: New test.
> >>         * gcc.dg/vect/vect-tail-mask-6.c: New test.
> >>         * gcc.dg/vect/vect-tail-mask-7.c: New test.
> >>         * gcc.dg/vect/vect-tail-mask-8.c: New test.
> >>         * gcc.dg/vect/vect-tail-mask-9.c: New test.
> >>         * gcc.dg/vect/vect-tail-nomask-1.c: New test.
> >>         * gcc.dg/vect/vect-tail-nomask-2.c: New test.
> >>         * gcc.dg/vect/vect-tail-nomask-3.c: New test.
> >>         * gcc.dg/vect/vect-tail-nomask-4.c: New test.
> >>         * gcc.dg/vect/vect-tail-nomask-5.c: New test.
> >>         * gcc.dg/vect/vect-tail-nomask-6.c: New test.
> >>         * gcc.dg/vect/vect-tail-nomask-7.c: New test.
> >
> > This is fine when the rest of the patches go in.
> >
> >
> >> +                 unsigned int eax, ebx, ecx, edx;
> >> +                 if (!__get_cpuid (1, &eax, &ebx, &ecx, &edx)
> >> +                     || ((ecx & bit_OSXSAVE) != bit_OSXSAVE))
> >> +                   return 1;
> >> +
> >> +                 if (__get_cpuid_max (0, NULL) < 7)
> >> +                   return 1;
> >> +
> >> +                 __cpuid_count (7, 0, eax, ebx, ecx, edx);
> >> +
> >> +                 return (ebx & bit_AVX2) != bit_AVX2;
> >
> > Ugh.  I'm going to trust this is correct.  I vaguely recall mucking around
> > with this stuff for the original AVX in glibc several years ago.
> 
> Actually I just copied some code from avx2-check.h.  Kirill should be able
> to review this piece of code.

LGTM.

--
Thanks, K
> 
> Thanks,
> Ilya
> 
> >
> > jeff
> >
diff mbox

Patch

diff --git a/gcc/testsuite/gcc.dg/vect/vect-tail-combine-1.c b/gcc/testsuite/gcc.dg/vect/vect-tail-combine-1.c
new file mode 100644
index 0000000..134d789
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/vect-tail-combine-1.c
@@ -0,0 +1,106 @@ 
+/* { dg-do run } */
+/* { dg-require-weak "" } */
+/* { dg-additional-options "-ftree-vectorize-epilogues=combine -fvect-epilogue-cost-model=unlimited -mavx2" { target avx2_runtime } } */
+
+#define SIZE 1023
+#define ALIGN 64
+
+extern int posix_memalign(void **memptr, __SIZE_TYPE__ alignment, __SIZE_TYPE__ size) __attribute__((weak));
+extern void free (void *);
+
+void __attribute__((noinline))
+test_citer (int * __restrict__ a,
+	    int * __restrict__ b,
+	    int * __restrict__ c)
+{
+  int i;
+
+  a = (int *)__builtin_assume_aligned (a, ALIGN);
+  b = (int *)__builtin_assume_aligned (b, ALIGN);
+  c = (int *)__builtin_assume_aligned (c, ALIGN);
+
+  for (i = 0; i < SIZE; i++)
+    c[i] = a[i] + b[i];
+}
+
+void __attribute__((noinline))
+test_viter (int * __restrict__ a,
+	    int * __restrict__ b,
+	    int * __restrict__ c,
+	    int size)
+{
+  int i;
+
+  a = (int *)__builtin_assume_aligned (a, ALIGN);
+  b = (int *)__builtin_assume_aligned (b, ALIGN);
+  c = (int *)__builtin_assume_aligned (c, ALIGN);
+
+  for (i = 0; i < size; i++)
+    c[i] = a[i] + b[i];
+}
+
+void __attribute__((noinline))
+init_data (int * __restrict__ a,
+	   int * __restrict__ b,
+	   int * __restrict__ c,
+	   int size)
+{
+  for (int i = 0; i < size; i++)
+    {
+      a[i] = i;
+      b[i] = -i;
+      c[i] = 0;
+      asm volatile("": : :"memory");
+    }
+  a[size] = b[size] = c[size] = size;
+}
+
+
+void __attribute__((noinline))
+run_test ()
+{
+  int *a;
+  int *b;
+  int *c;
+  int i;
+
+  if (posix_memalign ((void **)&a, ALIGN, (SIZE + 1) * sizeof (int)) != 0)
+    return;
+  if (posix_memalign ((void **)&b, ALIGN, (SIZE + 1) * sizeof (int)) != 0)
+    return;
+  if (posix_memalign ((void **)&c, ALIGN, (SIZE + 1) * sizeof (int)) != 0)
+    return;
+
+  init_data (a, b, c, SIZE);
+  test_citer (a, b, c);
+  for (i = 0; i < SIZE; i++)
+    if (c[i] != a[i] + b[i])
+      __builtin_abort ();
+  if (a[SIZE] != SIZE || b[SIZE] != SIZE || c[SIZE] != SIZE)
+    __builtin_abort ();
+
+  init_data (a, b, c, SIZE);
+  test_viter (a, b, c, SIZE);
+  for (i = 0; i < SIZE; i++)
+    if (c[i] != a[i] + b[i])
+      __builtin_abort ();
+  if (a[SIZE] != SIZE || b[SIZE] != SIZE || c[SIZE] != SIZE)
+    __builtin_abort ();
+
+  free (a);
+  free (b);
+  free (c);
+}
+
+int
+main (int argc, const char **argv)
+{
+  if (!posix_memalign)
+    return 0;
+
+  run_test ();
+  return 0;
+}
+
+/* { dg-final { scan-tree-dump-times "LOOP VECTORIZED \\(VS=32\\)" 2 "vect" { target avx2_runtime } } } */
+/* { dg-final { scan-tree-dump-times "LOOP EPILOGUE COMBINED \\(VS=32\\)" 2 "vect" { target avx2_runtime } } } */
diff --git a/gcc/testsuite/gcc.dg/vect/vect-tail-combine-2.c b/gcc/testsuite/gcc.dg/vect/vect-tail-combine-2.c
new file mode 100644
index 0000000..c513c5c
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/vect-tail-combine-2.c
@@ -0,0 +1,134 @@ 
+/* { dg-do run } */
+/* { dg-require-weak "" } */
+/* { dg-additional-options "-ftree-vectorize-epilogues=combine -fvect-epilogue-cost-model=unlimited -mavx2" { target avx2_runtime } } */
+
+#define SIZE 1023
+#define ALIGN 64
+
+extern int posix_memalign(void **memptr, __SIZE_TYPE__ alignment, __SIZE_TYPE__ size);
+extern void free (void *);
+
+void __attribute__((noinline))
+test_citer (int * __restrict__ a,
+	    int * __restrict__ b,
+	    int * __restrict__ c)
+{
+  int i;
+
+  a = (int *)__builtin_assume_aligned (a, ALIGN);
+  b = (int *)__builtin_assume_aligned (b, ALIGN);
+  c = (int *)__builtin_assume_aligned (c, ALIGN);
+
+  for (i = 0; i < SIZE; i++)
+    if (a[i] > 0)
+      b[i] = a[i] + c[i];
+}
+
+void __attribute__((noinline))
+test_viter (int * __restrict__ a,
+	    int * __restrict__ b,
+	    int * __restrict__ c,
+	    int size)
+{
+  int i;
+
+  a = (int *)__builtin_assume_aligned (a, ALIGN);
+  b = (int *)__builtin_assume_aligned (b, ALIGN);
+  c = (int *)__builtin_assume_aligned (c, ALIGN);
+
+  for (i = 0; i < size; i++)
+    if (a[i] > 0)
+      b[i] = a[i] + c[i];
+}
+
+void __attribute__((noinline))
+init_data (int * __restrict__ a,
+	   int * __restrict__ b,
+	   int * __restrict__ c,
+	   int size)
+{
+  int i;
+  for (i = 0; i < size; i++)
+    {
+      if (i % 2)
+	{
+	  a[i] = i;
+	  b[i] = 0;
+	  c[i] = 2 * i;
+	}
+      else
+	{
+	  a[i] = -i;
+	  b[i] = i;
+	  c[i] = 0;
+	}
+      asm volatile("": : :"memory");
+    }
+  a[size] = b[size] = c[size] = size;
+}
+
+
+void __attribute__((noinline))
+run_test ()
+{
+  int *a;
+  int *b;
+  int *c;
+  int i;
+
+  if (posix_memalign ((void **)&a, ALIGN, (SIZE + 1) * sizeof (int)) != 0)
+    return;
+  if (posix_memalign ((void **)&b, ALIGN, (SIZE + 1) * sizeof (int)) != 0)
+    return;
+  if (posix_memalign ((void **)&c, ALIGN, (SIZE + 1) * sizeof (int)) != 0)
+    return;
+
+  init_data (a, b, c, SIZE);
+  test_citer (a, b, c);
+  for (i = 0; i < SIZE; i++)
+    if (a[i] > 0)
+      {
+	if (b[i] != a[i] + c[i])
+	  __builtin_abort ();
+      }
+    else
+      {
+	if (b[i] != i)
+	  __builtin_abort ();
+      }
+  if (a[SIZE] != SIZE || b[SIZE] != SIZE || c[SIZE] != SIZE)
+    __builtin_abort ();
+
+  init_data (a, b, c, SIZE);
+  test_viter (a, b, c, SIZE);
+  for (i = 0; i < SIZE; i++)
+    if (a[i] > 0)
+      {
+	if (b[i] != a[i] + c[i])
+	  __builtin_abort ();
+      }
+    else
+      {
+	if (b[i] != i)
+	  __builtin_abort ();
+      }
+  if (a[SIZE] != SIZE || b[SIZE] != SIZE || c[SIZE] != SIZE)
+    __builtin_abort ();
+
+  free (a);
+  free (b);
+  free (c);
+}
+
+int
+main (int argc, const char **argv)
+{
+  if (!posix_memalign)
+    return 0;
+
+  run_test ();
+  return 0;
+}
+
+/* { dg-final { scan-tree-dump-times "LOOP VECTORIZED \\(VS=32\\)" 2 "vect" { target avx2_runtime } } } */
+/* { dg-final { scan-tree-dump-times "LOOP EPILOGUE COMBINED \\(VS=32\\)" 2 "vect" { target avx2_runtime } } } */
diff --git a/gcc/testsuite/gcc.dg/vect/vect-tail-combine-3.c b/gcc/testsuite/gcc.dg/vect/vect-tail-combine-3.c
new file mode 100644
index 0000000..17c5e95
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/vect-tail-combine-3.c
@@ -0,0 +1,111 @@ 
+/* { dg-do run } */
+/* { dg-require-weak "" } */
+/* { dg-additional-options "-ftree-vectorize-epilogues=combine -fvect-epilogue-cost-model=unlimited -mavx2" { target avx2_runtime } } */
+
+#define SIZE 1023
+#define ALIGN 64
+
+extern int posix_memalign(void **memptr, __SIZE_TYPE__ alignment, __SIZE_TYPE__ size);
+extern void free (void *);
+
+int __attribute__((noinline))
+test_citer (int * __restrict__ a,
+	    int * __restrict__ b,
+	    int * __restrict__ c)
+{
+  int res = 0;
+  int i;
+
+  a = (int *)__builtin_assume_aligned (a, ALIGN);
+  b = (int *)__builtin_assume_aligned (b, ALIGN);
+  c = (int *)__builtin_assume_aligned (c, ALIGN);
+
+  for (i = 0; i < SIZE; i++)
+    res += a[i] + b[i] * c[i];
+
+  return res;
+}
+
+int __attribute__((noinline))
+test_viter (int * __restrict__ a,
+	    int * __restrict__ b,
+	    int * __restrict__ c,
+	    int size)
+{
+  int res = 0;
+  int i;
+
+  a = (int *)__builtin_assume_aligned (a, ALIGN);
+  b = (int *)__builtin_assume_aligned (b, ALIGN);
+  c = (int *)__builtin_assume_aligned (c, ALIGN);
+
+  for (i = 0; i < size; i++)
+    res += a[i] + b[i] * c[i];
+
+  return res;
+}
+
+void __attribute__((noinline))
+init_data (int * __restrict__ a,
+	   int * __restrict__ b,
+	   int * __restrict__ c,
+	   int size)
+{
+  int i;
+  for (i = 0; i < size; i++)
+    {
+      a[i] = i;
+      b[i] = -i;
+      c[i] = 1;
+      asm volatile("": : :"memory");
+    }
+  a[size] = b[size] = c[size] = size;
+}
+
+
+void __attribute__((noinline))
+run_test ()
+{
+  int *a;
+  int *b;
+  int *c;
+  int res;
+
+  if (posix_memalign ((void **)&a, ALIGN, (SIZE + 1) * sizeof (int)) != 0)
+    return;
+  if (posix_memalign ((void **)&b, ALIGN, (SIZE + 1) * sizeof (int)) != 0)
+    return;
+  if (posix_memalign ((void **)&c, ALIGN, (SIZE + 1) * sizeof (int)) != 0)
+    return;
+
+  init_data (a, b, c, SIZE);
+  res = test_citer (a, b, c);
+  if (res != 0)
+    __builtin_abort ();
+  if (a[SIZE] != SIZE || b[SIZE] != SIZE || c[SIZE] != SIZE)
+    __builtin_abort ();
+
+  init_data (a, b, c, SIZE);
+  res = test_viter (a, b, c, SIZE);
+  if (res != 0)
+    __builtin_abort ();
+  if (a[SIZE] != SIZE || b[SIZE] != SIZE || c[SIZE] != SIZE)
+    __builtin_abort ();
+
+  free (a);
+  free (b);
+  free (c);
+}
+
+int
+main (int argc, const char **argv)
+{
+  if (!posix_memalign)
+    return 0;
+
+  run_test ();
+  return 0;
+}
+
+/* { dg-final { scan-tree-dump-times "LOOP VECTORIZED \\(VS=32\\)" 2 "vect" { target avx2_runtime } } } */
+/* { dg-final { scan-tree-dump-times "LOOP EPILOGUE COMBINED \\(VS=32\\)" 2 "vect" { target avx2_runtime } } } */
diff --git a/gcc/testsuite/gcc.dg/vect/vect-tail-combine-4.c b/gcc/testsuite/gcc.dg/vect/vect-tail-combine-4.c
new file mode 100644
index 0000000..854c1ab
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/vect-tail-combine-4.c
@@ -0,0 +1,122 @@ 
+/* { dg-do run } */
+/* { dg-require-weak "" } */
+/* { dg-additional-options "-ftree-vectorize-epilogues=combine -fvect-epilogue-cost-model=unlimited -mavx2" { target avx2_runtime } } */
+
+#define SIZE 1023
+#define ALIGN 64
+
+extern int posix_memalign(void **memptr, __SIZE_TYPE__ alignment, __SIZE_TYPE__ size);
+extern void free (void *);
+
+int __attribute__((noinline))
+test_citer (int * __restrict__ a,
+	    int * __restrict__ b,
+	    int * __restrict__ c)
+{
+  int res = 0;
+  int i;
+
+  a = (int *)__builtin_assume_aligned (a, ALIGN);
+  b = (int *)__builtin_assume_aligned (b, ALIGN);
+  c = (int *)__builtin_assume_aligned (c, ALIGN);
+
+  for (i = 0; i < SIZE; i++)
+    if (a[i] > 0)
+      res += b[i] + c[i];
+
+  return res;
+}
+
+int __attribute__((noinline))
+test_viter (int * __restrict__ a,
+	    int * __restrict__ b,
+	    int * __restrict__ c,
+	    int size)
+{
+  int res = 0;
+  int i;
+
+  a = (int *)__builtin_assume_aligned (a, ALIGN);
+  b = (int *)__builtin_assume_aligned (b, ALIGN);
+  c = (int *)__builtin_assume_aligned (c, ALIGN);
+
+  for (i = 0; i < size; i++)
+    if (a[i] > 0)
+      res += b[i] + c[i];
+
+  return res;
+}
+
+void __attribute__((noinline))
+init_data (int * __restrict__ a,
+	   int * __restrict__ b,
+	   int * __restrict__ c,
+	   int size)
+{
+  int i;
+  for (i = 0; i < size; i++)
+    {
+      if (i % 2)
+	{
+	  a[i] = i;
+	  b[i] = -i*2;
+	  c[i] = i*2;
+	}
+      else
+	{
+	  a[i] = -i;
+	  b[i] = i;
+	  c[i] = 10;
+	}
+      asm volatile("": : :"memory");
+    }
+  a[size] = b[size] = c[size] = size;
+}
+
+
+void __attribute__((noinline))
+run_test ()
+{
+  int *a;
+  int *b;
+  int *c;
+  int res;
+
+  if (posix_memalign ((void **)&a, ALIGN, (SIZE + 1) * sizeof (int)) != 0)
+    return;
+  if (posix_memalign ((void **)&b, ALIGN, (SIZE + 1) * sizeof (int)) != 0)
+    return;
+  if (posix_memalign ((void **)&c, ALIGN, (SIZE + 1) * sizeof (int)) != 0)
+    return;
+
+  init_data (a, b, c, SIZE);
+  res = test_citer (a, b, c);
+  if (res != 0)
+    __builtin_abort ();
+  if (a[SIZE] != SIZE || b[SIZE] != SIZE || c[SIZE] != SIZE)
+    __builtin_abort ();
+
+  init_data (a, b, c, SIZE);
+  res = test_viter (a, b, c, SIZE);
+  if (res != 0)
+    __builtin_abort ();
+  if (a[SIZE] != SIZE || b[SIZE] != SIZE || c[SIZE] != SIZE)
+    __builtin_abort ();
+
+  free (a);
+  free (b);
+  free (c);
+}
+
+int
+main (int argc, const char **argv)
+{
+  if (!posix_memalign)
+    return 0;
+
+  run_test ();
+  return 0;
+}
+
+/* { dg-final { scan-tree-dump-times "LOOP VECTORIZED \\(VS=32\\)" 2 "vect" { target avx2_runtime } } } */
+/* { dg-final { scan-tree-dump-times "LOOP EPILOGUE COMBINED \\(VS=32\\)" 2 "vect" { target avx2_runtime } } } */
diff --git a/gcc/testsuite/gcc.dg/vect/vect-tail-combine-5.c b/gcc/testsuite/gcc.dg/vect/vect-tail-combine-5.c
new file mode 100644
index 0000000..9589715
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/vect-tail-combine-5.c
@@ -0,0 +1,107 @@ 
+/* { dg-do run } */
+/* { dg-require-weak "" } */
+/* { dg-additional-options "-ftree-vectorize-epilogues=combine -fvect-epilogue-cost-model=unlimited -mavx2" { target avx2_runtime } } */
+
+#define SIZE 1023
+#define ALIGN 64
+
+extern int posix_memalign(void **memptr, __SIZE_TYPE__ alignment, __SIZE_TYPE__ size);
+extern void free (void *);
+
+void __attribute__((noinline))
+test_citer (int * __restrict__ a,
+	    int * __restrict__ b,
+	    int * __restrict__ c)
+{
+  long long i;
+
+  a = (int *)__builtin_assume_aligned (a, ALIGN);
+  b = (int *)__builtin_assume_aligned (b, ALIGN);
+  c = (int *)__builtin_assume_aligned (c, ALIGN);
+
+  for (i = 0; i < SIZE; i++)
+    c[i] = a[i] + b[i];
+}
+
+void __attribute__((noinline))
+test_viter (int * __restrict__ a,
+	    int * __restrict__ b,
+	    int * __restrict__ c,
+	    int size)
+{
+  long long i;
+
+  a = (int *)__builtin_assume_aligned (a, ALIGN);
+  b = (int *)__builtin_assume_aligned (b, ALIGN);
+  c = (int *)__builtin_assume_aligned (c, ALIGN);
+
+  for (i = 0; i < size; i++)
+    c[i] = a[i] + b[i];
+}
+
+void __attribute__((noinline))
+init_data (int * __restrict__ a,
+	   int * __restrict__ b,
+	   int * __restrict__ c,
+	   int size)
+{
+  int i;
+  for (i = 0; i < size; i++)
+    {
+      a[i] = i;
+      b[i] = -i;
+      c[i] = 0;
+      asm volatile("": : :"memory");
+    }
+  a[size] = b[size] = c[size] = size;
+}
+
+
+void __attribute__((noinline))
+run_test ()
+{
+  int *a;
+  int *b;
+  int *c;
+  long long i;
+
+  if (posix_memalign ((void **)&a, ALIGN, (SIZE + 1) * sizeof (int)) != 0)
+    return;
+  if (posix_memalign ((void **)&b, ALIGN, (SIZE + 1) * sizeof (int)) != 0)
+    return;
+  if (posix_memalign ((void **)&c, ALIGN, (SIZE + 1) * sizeof (int)) != 0)
+    return;
+
+  init_data (a, b, c, SIZE);
+  test_citer (a, b, c);
+  for (i = 0; i < SIZE; i++)
+    if (c[i] != a[i] + b[i])
+      __builtin_abort ();
+  if (a[SIZE] != SIZE || b[SIZE] != SIZE || c[SIZE] != SIZE)
+    __builtin_abort ();
+
+  init_data (a, b, c, SIZE);
+  test_viter (a, b, c, SIZE);
+  for (i = 0; i < SIZE; i++)
+    if (c[i] != a[i] + b[i])
+      __builtin_abort ();
+  if (a[SIZE] != SIZE || b[SIZE] != SIZE || c[SIZE] != SIZE)
+    __builtin_abort ();
+
+  free (a);
+  free (b);
+  free (c);
+}
+
+int
+main (int argc, const char **argv)
+{
+  if (!posix_memalign)
+    return 0;
+
+  run_test ();
+  return 0;
+}
+
+/* { dg-final { scan-tree-dump-times "LOOP VECTORIZED \\(VS=32\\)" 2 "vect" { target avx2_runtime } } } */
+/* { dg-final { scan-tree-dump-times "LOOP EPILOGUE COMBINED \\(VS=32\\)" 2 "vect" { target avx2_runtime } } } */
diff --git a/gcc/testsuite/gcc.dg/vect/vect-tail-combine-6.c b/gcc/testsuite/gcc.dg/vect/vect-tail-combine-6.c
new file mode 100644
index 0000000..284b2aa
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/vect-tail-combine-6.c
@@ -0,0 +1,107 @@ 
+/* { dg-do run } */
+/* { dg-require-weak "" } */
+/* { dg-additional-options "-ftree-vectorize-epilogues=combine -fvect-epilogue-cost-model=unlimited -mavx2" { target avx2_runtime } } */
+
+#define SIZE 1023
+#define ALIGN 64
+
+extern int posix_memalign(void **memptr, __SIZE_TYPE__ alignment, __SIZE_TYPE__ size);
+extern void free (void *);
+
+void __attribute__((noinline))
+test_citer (long long * __restrict__ a,
+	    long long * __restrict__ b,
+	    long long * __restrict__ c)
+{
+  int i;
+
+  a = (long long *)__builtin_assume_aligned (a, ALIGN);
+  b = (long long *)__builtin_assume_aligned (b, ALIGN);
+  c = (long long *)__builtin_assume_aligned (c, ALIGN);
+
+  for (i = 0; i < SIZE; i++)
+    c[i] = a[i] + b[i];
+}
+
+void __attribute__((noinline))
+test_viter (long long * __restrict__ a,
+	    long long * __restrict__ b,
+	    long long * __restrict__ c,
+	    int size)
+{
+  int i;
+
+  a = (long long *)__builtin_assume_aligned (a, ALIGN);
+  b = (long long *)__builtin_assume_aligned (b, ALIGN);
+  c = (long long *)__builtin_assume_aligned (c, ALIGN);
+
+  for (i = 0; i < size; i++)
+    c[i] = a[i] + b[i];
+}
+
+void __attribute__((noinline))
+init_data (long long * __restrict__ a,
+	   long long * __restrict__ b,
+	   long long * __restrict__ c,
+	   int size)
+{
+  int i;
+  for (i = 0; i < size; i++)
+    {
+      a[i] = i;
+      b[i] = -i;
+      c[i] = 0;
+      asm volatile("": : :"memory");
+    }
+  a[size] = b[size] = c[size] = size;
+}
+
+
+void __attribute__((noinline))
+run_test ()
+{
+  long long *a;
+  long long *b;
+  long long *c;
+  int i;
+
+  if (posix_memalign ((void **)&a, ALIGN, (SIZE + 1) * sizeof (long long)) != 0)
+    return;
+  if (posix_memalign ((void **)&b, ALIGN, (SIZE + 1) * sizeof (long long)) != 0)
+    return;
+  if (posix_memalign ((void **)&c, ALIGN, (SIZE + 1) * sizeof (long long)) != 0)
+    return;
+
+  init_data (a, b, c, SIZE);
+  test_citer (a, b, c);
+  for (i = 0; i < SIZE; i++)
+    if (c[i] != a[i] + b[i])
+      __builtin_abort ();
+  if (a[SIZE] != SIZE || b[SIZE] != SIZE || c[SIZE] != SIZE)
+    __builtin_abort ();
+
+  init_data (a, b, c, SIZE);
+  test_viter (a, b, c, SIZE);
+  for (i = 0; i < SIZE; i++)
+    if (c[i] != a[i] + b[i])
+      __builtin_abort ();
+  if (a[SIZE] != SIZE || b[SIZE] != SIZE || c[SIZE] != SIZE)
+    __builtin_abort ();
+
+  free (a);
+  free (b);
+  free (c);
+}
+
+int
+main (int argc, const char **argv)
+{
+  if (!posix_memalign)
+    return 0;
+
+  run_test ();
+  return 0;
+}
+
+/* { dg-final { scan-tree-dump-times "LOOP VECTORIZED \\(VS=32\\)" 2 "vect" { target avx2_runtime } } } */
+/* { dg-final { scan-tree-dump-times "LOOP EPILOGUE COMBINED \\(VS=32\\)" 2 "vect" { target avx2_runtime } } } */
diff --git a/gcc/testsuite/gcc.dg/vect/vect-tail-combine-7.c b/gcc/testsuite/gcc.dg/vect/vect-tail-combine-7.c
new file mode 100644
index 0000000..b328285
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/vect-tail-combine-7.c
@@ -0,0 +1,155 @@ 
+/* { dg-do run } */
+/* { dg-require-weak "" } */
+/* { dg-additional-options "-ffast-math -ftree-vectorize-epilogues=combine -fvect-epilogue-cost-model=unlimited -mavx2" { target avx2_runtime } } */
+
+#define SIZE 1023
+#define ALIGN 64
+
+extern int posix_memalign(void **memptr, __SIZE_TYPE__ alignment, __SIZE_TYPE__ size);
+extern void free (void *);
+
+double __attribute__((noinline))
+test_citer (int * __restrict__ a,
+	    long long * __restrict__ b,
+	    float * __restrict__ c,
+	    double * __restrict__ d)
+{
+  double res = 0;
+  int i;
+
+  a = (int *)__builtin_assume_aligned (a, ALIGN);
+  b = (long long *)__builtin_assume_aligned (b, ALIGN);
+  c = (float *)__builtin_assume_aligned (c, ALIGN);
+  d = (double *)__builtin_assume_aligned (d, ALIGN);
+
+  for (i = 0; i < SIZE; i++)
+    {
+      a[i] = c[i] + 1;
+      if (b[i] < 0)
+	res += d[i];
+    }
+
+  return res;
+}
+
+double __attribute__((noinline))
+test_viter (int * __restrict__ a,
+	    long long * __restrict__ b,
+	    float * __restrict__ c,
+	    double * __restrict__ d,
+	    int size)
+{
+  double res = 0;
+  int i;
+
+  a = (int *)__builtin_assume_aligned (a, ALIGN);
+  b = (long long *)__builtin_assume_aligned (b, ALIGN);
+  c = (float *)__builtin_assume_aligned (c, ALIGN);
+  d = (double *)__builtin_assume_aligned (d, ALIGN);
+
+  for (i = 0; i < size; i++)
+    {
+      a[i] = c[i] + 1;
+      if (b[i] < 0)
+	res += d[i];
+    }
+
+  return res;
+}
+
+void __attribute__((noinline))
+init_data (int * __restrict__ a,
+	   long long * __restrict__ b,
+	   float * __restrict__ c,
+	   double * __restrict__ d,
+	   int size)
+{
+  int i;
+  for (i = 0; i < size; i++)
+    {
+      if (i % 2)
+	{
+	  a[i] = 0;
+	  b[i] = i;
+	  c[i] = 2.5;
+	  d[i] = 1;
+	}
+      else
+	{
+	  a[i] = 0;
+	  b[i] = -i;
+	  c[i] = 2.5;
+	  d[i] = -1;
+	}
+      asm volatile("": : :"memory");
+    }
+  a[size] = (int)size;
+  b[size] = (long long)size;
+  c[size] = (float)size;
+  d[size] = (double)size;
+}
+
+void __attribute__((noinline))
+run_test ()
+{
+  int *a;
+  long long *b;
+  float *c;
+  double *d;
+  double res;
+  int i;
+
+  if (posix_memalign ((void **)&a, ALIGN, (SIZE + 1) * sizeof (int)) != 0)
+    return;
+  if (posix_memalign ((void **)&b, ALIGN, (SIZE + 1) * sizeof (long long)) != 0)
+    return;
+  if (posix_memalign ((void **)&c, ALIGN, (SIZE + 1) * sizeof (float)) != 0)
+    return;
+  if (posix_memalign ((void **)&d, ALIGN, (SIZE + 1) * sizeof (double)) != 0)
+    return;
+
+  init_data (a, b, c, d, SIZE);
+  res = test_citer (a, b, c, d);
+  res += SIZE / 2;
+  if (res > 0.01 || res < -0.01)
+    __builtin_abort ();
+  for (i = 0; i < SIZE; i++)
+    if (a[i] != 3)
+      __builtin_abort ();
+  if (a[SIZE] != (int)SIZE
+      || b[SIZE] != (long long)SIZE
+      || c[SIZE] != (float)SIZE
+      || d[SIZE] != (double)SIZE)
+    __builtin_abort ();
+
+  init_data (a, b, c, d, SIZE);
+  res = test_viter (a, b, c, d, SIZE);
+  res += SIZE / 2;
+  if (res > 0.01 || res < -0.01)
+    __builtin_abort ();
+  for (i = 0; i < SIZE; i++)
+    if (a[i] != 3)
+      __builtin_abort ();
+  if (a[SIZE] != (int)SIZE
+      || b[SIZE] != (long long)SIZE
+      || c[SIZE] != (float)SIZE
+      || d[SIZE] != (double)SIZE)
+    __builtin_abort ();
+
+  free (a);
+  free (b);
+  free (c);
+}
+
+int
+main (int argc, const char **argv)
+{
+  if (!posix_memalign)
+    return 0;
+
+  run_test ();
+  return 0;
+}
+
+/* { dg-final { scan-tree-dump-times "LOOP VECTORIZED \\(VS=32\\)" 2 "vect" { target avx2_runtime } } } */
+/* { dg-final { scan-tree-dump-times "LOOP EPILOGUE COMBINED \\(VS=32\\)" 2 "vect" { target avx2_runtime } } } */
diff --git a/gcc/testsuite/gcc.dg/vect/vect-tail-combine-9.c b/gcc/testsuite/gcc.dg/vect/vect-tail-combine-9.c
new file mode 100644
index 0000000..221835a
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/vect-tail-combine-9.c
@@ -0,0 +1,24 @@ 
+/* { dg-do compile } */
+/* { dg-additional-options "-ftree-vectorize-epilogues=combine -fvect-epilogue-cost-model=dynamic -fvect-cost-model=dynamic" } */
+/* { dg-additional-options "-march=knl" { target { i?86-*-* x86_64-*-* } } } */
+
+#define SIZE 33
+#define ALIGN 64
+
+void
+test (int * __restrict__ a,
+      int * __restrict__ b,
+      int * __restrict__ c)
+{
+  int i;
+
+  a = (int *)__builtin_assume_aligned (a, ALIGN);
+  b = (int *)__builtin_assume_aligned (b, ALIGN);
+  c = (int *)__builtin_assume_aligned (c, ALIGN);
+
+  for (i = 0; i < SIZE; i++)
+    c[i] = a[i] + b[i];
+}
+
+/* { dg-final { scan-tree-dump-times "LOOP VECTORIZED \\(VS=64\\)" 1 "vect" { target { i?86-*-* x86_64-*-* } } } } */
+/* { dg-final { scan-tree-dump-not "LOOP EPILOGUE COMBINED \\(VS=64\\)" "vect" } } */
diff --git a/gcc/testsuite/gcc.dg/vect/vect-tail-mask-1.c b/gcc/testsuite/gcc.dg/vect/vect-tail-mask-1.c
new file mode 100644
index 0000000..7f50a17
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/vect-tail-mask-1.c
@@ -0,0 +1,8 @@ 
+/* { dg-do run } */
+/* { dg-require-weak "" } */
+/* { dg-additional-options "-ftree-vectorize-epilogues=mask -fvect-epilogue-cost-model=unlimited -mavx2" { target avx2_runtime } } */
+
+#include "vect-tail-combine-1.c"
+
+/* { dg-final { scan-tree-dump-times "LOOP VECTORIZED \\(VS=32\\)" 2 "vect" { target avx2_runtime } } } */
+/* { dg-final { scan-tree-dump-times "LOOP EPILOGUE VECTORIZED AND MASKED \\(VS=32\\)" 2 "vect" { target avx2_runtime } } } */
diff --git a/gcc/testsuite/gcc.dg/vect/vect-tail-mask-2.c b/gcc/testsuite/gcc.dg/vect/vect-tail-mask-2.c
new file mode 100644
index 0000000..995631c
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/vect-tail-mask-2.c
@@ -0,0 +1,8 @@ 
+/* { dg-do run } */
+/* { dg-require-weak "" } */
+/* { dg-additional-options "-ftree-vectorize-epilogues=mask -fvect-epilogue-cost-model=unlimited -mavx2" { target avx2_runtime } } */
+
+#include "vect-tail-combine-2.c"
+
+/* { dg-final { scan-tree-dump-times "LOOP VECTORIZED \\(VS=32\\)" 2 "vect" { target avx2_runtime } } } */
+/* { dg-final { scan-tree-dump-times "LOOP EPILOGUE VECTORIZED AND MASKED \\(VS=32\\)" 2 "vect" { target avx2_runtime } } } */
diff --git a/gcc/testsuite/gcc.dg/vect/vect-tail-mask-3.c b/gcc/testsuite/gcc.dg/vect/vect-tail-mask-3.c
new file mode 100644
index 0000000..fe405bf
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/vect-tail-mask-3.c
@@ -0,0 +1,8 @@ 
+/* { dg-do run } */
+/* { dg-require-weak "" } */
+/* { dg-additional-options "-ftree-vectorize-epilogues=mask -fvect-epilogue-cost-model=unlimited -mavx2" { target avx2_runtime } } */
+
+#include "vect-tail-combine-3.c"
+
+/* { dg-final { scan-tree-dump-times "LOOP VECTORIZED \\(VS=32\\)" 2 "vect" { target avx2_runtime } } } */
+/* { dg-final { scan-tree-dump-times "LOOP EPILOGUE VECTORIZED AND MASKED \\(VS=32\\)" 2 "vect" { target avx2_runtime } } } */
diff --git a/gcc/testsuite/gcc.dg/vect/vect-tail-mask-4.c b/gcc/testsuite/gcc.dg/vect/vect-tail-mask-4.c
new file mode 100644
index 0000000..3bbd054
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/vect-tail-mask-4.c
@@ -0,0 +1,8 @@ 
+/* { dg-do run } */
+/* { dg-require-weak "" } */
+/* { dg-additional-options "-ftree-vectorize-epilogues=mask -fvect-epilogue-cost-model=unlimited -mavx2" { target avx2_runtime } } */
+
+#include "vect-tail-combine-4.c"
+
+/* { dg-final { scan-tree-dump-times "LOOP VECTORIZED \\(VS=32\\)" 2 "vect" { target avx2_runtime } } } */
+/* { dg-final { scan-tree-dump-times "LOOP EPILOGUE VECTORIZED AND MASKED \\(VS=32\\)" 2 "vect" { target avx2_runtime } } } */
diff --git a/gcc/testsuite/gcc.dg/vect/vect-tail-mask-5.c b/gcc/testsuite/gcc.dg/vect/vect-tail-mask-5.c
new file mode 100644
index 0000000..7541061
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/vect-tail-mask-5.c
@@ -0,0 +1,8 @@ 
+/* { dg-do run } */
+/* { dg-require-weak "" } */
+/* { dg-additional-options "-ftree-vectorize-epilogues=mask -fvect-epilogue-cost-model=unlimited -mavx2" { target avx2_runtime } } */
+
+#include "vect-tail-combine-5.c"
+
+/* { dg-final { scan-tree-dump-times "LOOP VECTORIZED \\(VS=32\\)" 2 "vect" { target avx2_runtime } } } */
+/* { dg-final { scan-tree-dump-times "LOOP EPILOGUE VECTORIZED AND MASKED \\(VS=32\\)" 2 "vect" { target avx2_runtime } } } */
diff --git a/gcc/testsuite/gcc.dg/vect/vect-tail-mask-6.c b/gcc/testsuite/gcc.dg/vect/vect-tail-mask-6.c
new file mode 100644
index 0000000..2af1c6a
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/vect-tail-mask-6.c
@@ -0,0 +1,8 @@ 
+/* { dg-do run } */
+/* { dg-require-weak "" } */
+/* { dg-additional-options "-ftree-vectorize-epilogues=mask -fvect-epilogue-cost-model=unlimited -mavx2" { target avx2_runtime } } */
+
+#include "vect-tail-combine-6.c"
+
+/* { dg-final { scan-tree-dump-times "LOOP VECTORIZED \\(VS=32\\)" 2 "vect" { target avx2_runtime } } } */
+/* { dg-final { scan-tree-dump-times "LOOP EPILOGUE VECTORIZED AND MASKED \\(VS=32\\)" 2 "vect" { target avx2_runtime } } } */
diff --git a/gcc/testsuite/gcc.dg/vect/vect-tail-mask-7.c b/gcc/testsuite/gcc.dg/vect/vect-tail-mask-7.c
new file mode 100644
index 0000000..72f3119
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/vect-tail-mask-7.c
@@ -0,0 +1,8 @@ 
+/* { dg-do run } */
+/* { dg-require-weak "" } */
+/* { dg-additional-options "-ffast-math -ftree-vectorize-epilogues=mask -fvect-epilogue-cost-model=unlimited -mavx2" { target avx2_runtime } } */
+
+#include "vect-tail-combine-7.c"
+
+/* { dg-final { scan-tree-dump-times "LOOP VECTORIZED \\(VS=32\\)" 2 "vect" { target avx2_runtime } } } */
+/* { dg-final { scan-tree-dump-times "LOOP EPILOGUE VECTORIZED AND MASKED \\(VS=32\\)" 2 "vect" { target avx2_runtime } } } */
diff --git a/gcc/testsuite/gcc.dg/vect/vect-tail-mask-8.c b/gcc/testsuite/gcc.dg/vect/vect-tail-mask-8.c
new file mode 100644
index 0000000..552e974
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/vect-tail-mask-8.c
@@ -0,0 +1,24 @@ 
+/* { dg-do compile } */
+/* { dg-additional-options "-ftree-vectorize-epilogues=mask -fvect-epilogue-cost-model=dynamic -fvect-cost-model=dynamic" } */
+/* { dg-additional-options "-march=knl" { target { i?86-*-* x86_64-*-* } } } */
+
+#define SIZE 31
+#define ALIGN 64
+
+void
+test (int * __restrict__ a,
+      int * __restrict__ b,
+      int * __restrict__ c)
+{
+  int i;
+
+  a = (int *)__builtin_assume_aligned (a, ALIGN);
+  b = (int *)__builtin_assume_aligned (b, ALIGN);
+  c = (int *)__builtin_assume_aligned (c, ALIGN);
+
+  for (i = 0; i < SIZE; i++)
+    c[i] = a[i] + b[i];
+}
+
+/* { dg-final { scan-tree-dump-times "LOOP VECTORIZED \\(VS=64\\)" 1 "vect" { target { i?86-*-* x86_64-*-* } } } } */
+/* { dg-final { scan-tree-dump-times "LOOP EPILOGUE VECTORIZED AND MASKED \\(VS=64\\)" 1 "vect" { target { i?86-*-* x86_64-*-* } } } } */
diff --git a/gcc/testsuite/gcc.dg/vect/vect-tail-mask-9.c b/gcc/testsuite/gcc.dg/vect/vect-tail-mask-9.c
new file mode 100644
index 0000000..61c0f80
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/vect-tail-mask-9.c
@@ -0,0 +1,8 @@ 
+/* { dg-do compile } */
+/* { dg-additional-options "-ftree-vectorize-epilogues=mask -fvect-epilogue-cost-model=dynamic -fvect-cost-model=dynamic" } */
+/* { dg-additional-options "-march=knl" { target { i?86-*-* x86_64-*-* } } } */
+
+#include "vect-tail-combine-9.c"
+
+/* { dg-final { scan-tree-dump-times "LOOP VECTORIZED \\(VS=64\\)" 1 "vect" { target { i?86-*-* x86_64-*-* } } } } */
+/* { dg-final { scan-tree-dump-not "LOOP EPILOGUE VECTORIZED AND MASKED \\(VS=64\\)" "vect" { target { i?86-*-* x86_64-*-* } } } } */
diff --git a/gcc/testsuite/gcc.dg/vect/vect-tail-nomask-1.c b/gcc/testsuite/gcc.dg/vect/vect-tail-nomask-1.c
new file mode 100644
index 0000000..e3c40f7
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/vect-tail-nomask-1.c
@@ -0,0 +1,8 @@ 
+/* { dg-do run } */
+/* { dg-require-weak "" } */
+/* { dg-additional-options "-ftree-vectorize-epilogues=nomask -fvect-epilogue-cost-model=unlimited -mavx2" { target avx2_runtime } } */
+
+#include "vect-tail-combine-1.c"
+
+/* { dg-final { scan-tree-dump-times "LOOP VECTORIZED \\(VS=32\\)" 2 "vect" { target avx2_runtime } } } */
+/* { dg-final { scan-tree-dump-times "LOOP EPILOGUE VECTORIZED \\(VS=16\\)" 2 "vect" { target avx2_runtime } } } */
diff --git a/gcc/testsuite/gcc.dg/vect/vect-tail-nomask-2.c b/gcc/testsuite/gcc.dg/vect/vect-tail-nomask-2.c
new file mode 100644
index 0000000..cea2c1d
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/vect-tail-nomask-2.c
@@ -0,0 +1,8 @@ 
+/* { dg-do run } */
+/* { dg-require-weak "" } */
+/* { dg-additional-options "-ftree-vectorize-epilogues=nomask -fvect-epilogue-cost-model=unlimited -mavx2" { target avx2_runtime } } */
+
+#include "vect-tail-combine-2.c"
+
+/* { dg-final { scan-tree-dump-times "LOOP VECTORIZED \\(VS=32\\)" 2 "vect" { target avx2_runtime } } } */
+/* { dg-final { scan-tree-dump-times "LOOP EPILOGUE VECTORIZED \\(VS=16\\)" 2 "vect" { target avx2_runtime } } } */
diff --git a/gcc/testsuite/gcc.dg/vect/vect-tail-nomask-3.c b/gcc/testsuite/gcc.dg/vect/vect-tail-nomask-3.c
new file mode 100644
index 0000000..18bbbc4
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/vect-tail-nomask-3.c
@@ -0,0 +1,8 @@ 
+/* { dg-do run } */
+/* { dg-require-weak "" } */
+/* { dg-additional-options "-ftree-vectorize-epilogues=nomask -fvect-epilogue-cost-model=unlimited -mavx2" { target avx2_runtime } } */
+
+#include "vect-tail-combine-3.c"
+
+/* { dg-final { scan-tree-dump-times "LOOP VECTORIZED \\(VS=32\\)" 2 "vect" { target avx2_runtime } } } */
+/* { dg-final { scan-tree-dump-times "LOOP EPILOGUE VECTORIZED \\(VS=16\\)" 2 "vect" { target avx2_runtime } } } */
diff --git a/gcc/testsuite/gcc.dg/vect/vect-tail-nomask-4.c b/gcc/testsuite/gcc.dg/vect/vect-tail-nomask-4.c
new file mode 100644
index 0000000..beb9e0f
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/vect-tail-nomask-4.c
@@ -0,0 +1,8 @@ 
+/* { dg-do run } */
+/* { dg-require-weak "" } */
+/* { dg-additional-options "-ftree-vectorize-epilogues=nomask -fvect-epilogue-cost-model=unlimited -mavx2" { target avx2_runtime } } */
+
+#include "vect-tail-combine-4.c"
+
+/* { dg-final { scan-tree-dump-times "LOOP VECTORIZED \\(VS=32\\)" 2 "vect" { target avx2_runtime } } } */
+/* { dg-final { scan-tree-dump-times "LOOP EPILOGUE VECTORIZED \\(VS=16\\)" 2 "vect" { target avx2_runtime } } } */
diff --git a/gcc/testsuite/gcc.dg/vect/vect-tail-nomask-5.c b/gcc/testsuite/gcc.dg/vect/vect-tail-nomask-5.c
new file mode 100644
index 0000000..329652f
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/vect-tail-nomask-5.c
@@ -0,0 +1,8 @@ 
+/* { dg-do run } */
+/* { dg-require-weak "" } */
+/* { dg-additional-options "-ftree-vectorize-epilogues=nomask -fvect-epilogue-cost-model=unlimited -mavx2" { target avx2_runtime } } */
+
+#include "vect-tail-combine-5.c"
+
+/* { dg-final { scan-tree-dump-times "LOOP VECTORIZED \\(VS=32\\)" 2 "vect" { target avx2_runtime } } } */
+/* { dg-final { scan-tree-dump-times "LOOP EPILOGUE VECTORIZED \\(VS=16\\)" 2 "vect" { target avx2_runtime } } } */
diff --git a/gcc/testsuite/gcc.dg/vect/vect-tail-nomask-6.c b/gcc/testsuite/gcc.dg/vect/vect-tail-nomask-6.c
new file mode 100644
index 0000000..3e9a405
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/vect-tail-nomask-6.c
@@ -0,0 +1,8 @@ 
+/* { dg-do run } */
+/* { dg-require-weak "" } */
+/* { dg-additional-options "-ftree-vectorize-epilogues=nomask -fvect-epilogue-cost-model=unlimited -mavx2" { target avx2_runtime } } */
+
+#include "vect-tail-combine-6.c"
+
+/* { dg-final { scan-tree-dump-times "LOOP VECTORIZED \\(VS=32\\)" 2 "vect" { target avx2_runtime } } } */
+/* { dg-final { scan-tree-dump-times "LOOP EPILOGUE VECTORIZED \\(VS=16\\)" 2 "vect" { target avx2_runtime } } } */
diff --git a/gcc/testsuite/gcc.dg/vect/vect-tail-nomask-7.c b/gcc/testsuite/gcc.dg/vect/vect-tail-nomask-7.c
new file mode 100644
index 0000000..a229414
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/vect-tail-nomask-7.c
@@ -0,0 +1,8 @@ 
+/* { dg-do run } */
+/* { dg-require-weak "" } */
+/* { dg-additional-options "-ffast-math -ftree-vectorize-epilogues=nomask -fvect-epilogue-cost-model=unlimited -mavx2" { target avx2_runtime } } */
+
+#include "vect-tail-combine-7.c"
+
+/* { dg-final { scan-tree-dump-times "LOOP VECTORIZED \\(VS=32\\)" 2 "vect" { target avx2_runtime } } } */
+/* { dg-final { scan-tree-dump-times "LOOP EPILOGUE VECTORIZED \\(VS=16\\)" 2 "vect" { target avx2_runtime } } } */
diff --git a/gcc/testsuite/lib/target-supports.exp b/gcc/testsuite/lib/target-supports.exp
index 04ca176..8b54710 100644
--- a/gcc/testsuite/lib/target-supports.exp
+++ b/gcc/testsuite/lib/target-supports.exp
@@ -1596,6 +1596,36 @@  proc check_avx_hw_available { } {
     }]
 }
 
+# Return 1 if the target supports executing AVX2 instructions, 0
+# otherwise.  Cache the result.
+
+proc check_avx2_hw_available { } {
+    return [check_cached_effective_target avx2_hw_available {
+	# If this is not the right target then we can skip the test.
+	if { !([istarget x86_64-*-*] || [istarget i?86-*-*]) } {
+	    expr 0
+	} else {
+	    check_runtime_nocache avx2_hw_available {
+		#include "cpuid.h"
+		int main ()
+		{
+		  unsigned int eax, ebx, ecx, edx;
+		  if (!__get_cpuid (1, &eax, &ebx, &ecx, &edx)
+		      || ((ecx & bit_OSXSAVE) != bit_OSXSAVE))
+		    return 1;
+
+		  if (__get_cpuid_max (0, NULL) < 7)
+		    return 1;
+
+		  __cpuid_count (7, 0, eax, ebx, ecx, edx);
+
+		  return (ebx & bit_AVX2) != bit_AVX2;
+		}
+	    } ""
+	}
+    }]
+}
+
 # Return 1 if the target supports running SSE executables, 0 otherwise.
 
 proc check_effective_target_sse_runtime { } {
@@ -1629,6 +1659,17 @@  proc check_effective_target_avx_runtime { } {
     return 0
 }
 
+# Return 1 if the target supports running AVX2 executables, 0 otherwise.
+
+proc check_effective_target_avx2_runtime { } {
+    if { [check_effective_target_avx2]
+	 && [check_avx2_hw_available]
+	 && [check_avx_os_support_available] } {
+	return 1
+    }
+    return 0
+}
+
 # Return 1 if we are compiling for 64-bit PowerPC but we do not use direct
 # move instructions for moves from GPR to FPR.