===================================================================
@@ -4,13 +4,16 @@
#include <string.h>
#include "tree-vect.h"
+/* AVX512BW has V64QImode, make char_1 vectorizable with that. */
+#define MAX_VEC_ELEMENTS 64
+
#define FOO(T,N) \
void __attribute__((noinline,noclone)) \
foo_ ## T ## _ ## N (T * __restrict__ in_, T * __restrict__ out_, int s) \
{ \
T *in = __builtin_assume_aligned (in_, __BIGGEST_ALIGNMENT__); \
T *out = __builtin_assume_aligned (out_, __BIGGEST_ALIGNMENT__); \
- for (int i = 0; i < 16; i++) \
+ for (int i = 0; i < MAX_VEC_ELEMENTS; i++) \
{ \
for (int j = 0; j < N; ++j) \
out[j] = in[j]; \
@@ -23,9 +26,9 @@ foo_ ## T ## _ ## N (T * __restrict__ in
do { \
memset (out, 0, 4096); \
foo_ ## T ## _ ## N ((T *)in, (T *)out, 1); \
- if (memcmp (in, out, sizeof (T) * 16 * N) != 0) \
+ if (memcmp (in, out, sizeof (T) * MAX_VEC_ELEMENTS * N) != 0) \
__builtin_abort (); \
- for (int i = sizeof (T) * 16 * N; i < 4096; ++i) \
+ for (int i = sizeof (T) * MAX_VEC_ELEMENTS * N; i < 4096; ++i) \
if (out[i] != 0) \
__builtin_abort (); \
} while (0)
===================================================================
@@ -4,13 +4,16 @@
#include <string.h>
#include "tree-vect.h"
+/* AVX512BW has V64QImode, make char_1 vectorizable with that. */
+#define MAX_VEC_ELEMENTS 64
+
#define FOO(T,N) \
void __attribute__((noinline,noclone)) \
foo_ ## T ## _ ## N (T * __restrict__ in_, T * __restrict__ out_, int s) \
{ \
T *in = __builtin_assume_aligned (in_, __BIGGEST_ALIGNMENT__); \
T *out = __builtin_assume_aligned (out_, __BIGGEST_ALIGNMENT__); \
- for (int i = 0; i < 16; i++) \
+ for (int i = 0; i < MAX_VEC_ELEMENTS; i++) \
{ \
for (int j = 0; j < N; ++j) \
out[j] = in[j]; \
@@ -23,9 +26,9 @@ foo_ ## T ## _ ## N (T * __restrict__ in
do { \
memset (out, 0, 4096); \
foo_ ## T ## _ ## N ((T *)in, (T *)out, 1); \
- if (memcmp (in, out, sizeof (T) * 16 * N) != 0) \
+ if (memcmp (in, out, sizeof (T) * MAX_VEC_ELEMENTS * N) != 0) \
__builtin_abort (); \
- for (int i = sizeof (T) * 16 * N; i < 4096; ++i) \
+ for (int i = sizeof (T) * MAX_VEC_ELEMENTS * N; i < 4096; ++i) \
if (out[i] != 0) \
__builtin_abort (); \
} while (0)