===================================================================
@@ -6060,11 +6060,6 @@ get_vectype_for_scalar_type_and_size (tr
&& GET_MODE_CLASS (inner_mode) != MODE_FLOAT)
return NULL_TREE;
- /* We can't build a vector type of elements with alignment bigger than
- their size. */
- if (nbytes < TYPE_ALIGN_UNIT (scalar_type))
- return NULL_TREE;
-
/* For vector types of elements whose mode precision doesn't
match their types precision we use a element type of mode
precision. The vectorization routines will have to make sure
@@ -6086,6 +6081,11 @@ get_vectype_for_scalar_type_and_size (tr
&& !POINTER_TYPE_P (scalar_type))
scalar_type = lang_hooks.types.type_for_mode (inner_mode, 1);
+ /* We can't build a vector type of elements with alignment bigger than
+ their size. */
+ if (nbytes < TYPE_ALIGN_UNIT (scalar_type))
+ scalar_type = lang_hooks.types.type_for_mode (inner_mode, 1);
+
/* If no size was supplied use the mode the target prefers. Otherwise
lookup a vector mode of the specified size. */
if (size == 0)
===================================================================
@@ -0,0 +1,27 @@
+/* { dg-do compile } */
+
+typedef unsigned long long uint64_t;
+
+#define n 4096
+double A[n][n] __attribute__((aligned(16)));
+double B[n][n] __attribute__((aligned(16)));
+double C[n][n] __attribute__((aligned(16)));
+
+#define tilesize 128
+
+typedef double adouble __attribute__((__aligned__(16)));
+
+void foo ()
+{
+ int ih, jh, kh, il, kl, jl;
+ for (ih = 0; ih < n; ih += tilesize)
+ for (jh = 0; jh < n; jh += tilesize)
+ for (kh = 0; kh < n; kh += tilesize)
+ for (il = 0; il < tilesize; ++il)
+ {
+ adouble *Ap = (adouble *)&A[ih+il][kh];
+ for (kl = 0; kl < tilesize; ++kl)
+ for (jl = 0; jl < tilesize; ++jl)
+ C[ih+il][jh+jl] += Ap[kl] * B[kh+kl][jh+jl];
+ }
+}