diff mbox

Fix PR54894

Message ID alpine.LNX.2.00.1210120953050.4063@zhemvz.fhfr.qr
State New
Headers show

Commit Message

Richard Biener Oct. 12, 2012, 7:53 a.m. UTC
This fixes PR54894.

Bootstrapped and tested on x86_64-unknown-linux-gnu, applied.

Richard.

2012-10-12  Richard Biener  <rguenther@suse.de>

	PR tree-optimization/54894
	* tree-vect-stmts.c (get_vectype_for_scalar_type_and_size):
	Handle over-aligned scalar types properly.

	* gcc.dg/torture/pr54894.c: New testcase.
diff mbox

Patch

Index: gcc/tree-vect-stmts.c
===================================================================
--- gcc/tree-vect-stmts.c	(revision 192359)
+++ gcc/tree-vect-stmts.c	(working copy)
@@ -6060,11 +6060,6 @@  get_vectype_for_scalar_type_and_size (tr
       && GET_MODE_CLASS (inner_mode) != MODE_FLOAT)
     return NULL_TREE;
 
-  /* We can't build a vector type of elements with alignment bigger than
-     their size.  */
-  if (nbytes < TYPE_ALIGN_UNIT (scalar_type))
-    return NULL_TREE;
-
   /* For vector types of elements whose mode precision doesn't
      match their types precision we use a element type of mode
      precision.  The vectorization routines will have to make sure
@@ -6086,6 +6081,11 @@  get_vectype_for_scalar_type_and_size (tr
       && !POINTER_TYPE_P (scalar_type))
     scalar_type = lang_hooks.types.type_for_mode (inner_mode, 1);
 
+  /* We can't build a vector type of elements with alignment bigger than
+     their size.  */
+  if (nbytes < TYPE_ALIGN_UNIT (scalar_type))
+    scalar_type = lang_hooks.types.type_for_mode (inner_mode, 1);
+
   /* If no size was supplied use the mode the target prefers.   Otherwise
      lookup a vector mode of the specified size.  */
   if (size == 0)
Index: gcc/testsuite/gcc.dg/torture/pr54894.c
===================================================================
--- gcc/testsuite/gcc.dg/torture/pr54894.c	(revision 0)
+++ gcc/testsuite/gcc.dg/torture/pr54894.c	(working copy)
@@ -0,0 +1,27 @@ 
+/* { dg-do compile } */
+
+typedef unsigned long long uint64_t;
+
+#define n 4096
+double A[n][n] __attribute__((aligned(16)));
+double B[n][n] __attribute__((aligned(16)));
+double C[n][n] __attribute__((aligned(16)));
+
+#define tilesize 128
+
+typedef double adouble __attribute__((__aligned__(16)));
+
+void foo ()
+{
+  int ih, jh, kh, il, kl, jl;
+  for (ih = 0; ih < n; ih += tilesize) 
+    for (jh = 0; jh < n; jh += tilesize)                 
+      for (kh = 0; kh < n; kh += tilesize)                      
+	for (il = 0; il < tilesize; ++il)
+	  {
+	    adouble *Ap = (adouble *)&A[ih+il][kh];
+	    for (kl = 0; kl < tilesize; ++kl)
+	      for (jl = 0; jl < tilesize; ++jl)
+		C[ih+il][jh+jl] += Ap[kl] * B[kh+kl][jh+jl];
+	  }
+}