diff mbox series

Fix PR84278

Message ID alpine.LSU.2.20.1802081042580.18265@zhemvz.fhfr.qr
State New
Headers show
Series Fix PR84278 | expand

Commit Message

Richard Biener Feb. 8, 2018, 9:47 a.m. UTC
Noticed while (still...) working on PR84038.  The vectorizer happily
tries to construct a V4SFmode from two V2SFmode vectors because
there's an optab handler for it.  But it failed to check whether
that mode is supported and RTL expansion later uses TYPE_MODE
to get at the element mode which ends up as BLKmode and thus
we go through the stack...

So this makes the vectorizer test targetm.vector_mode_supported_p
as well before making use of such types.  In the above case the
vectorizer then resorts to using two DImode scalars instead.
I've verified that's still faster than doing four SFmode scalar
loads despite whatever reformatting penalty that might occur.

Bootstrapped on x86_64-unknown-linux-gnu, testing in progress.

For PR84038 this makes a difference when compiling with
-mprefer-avx128 -fno-vect-cost-model.

Richard.

2018-02-08  Richard Biener  <rguenther@suse.de>

	PR tree-optimization/84278
	* tree-vect-stmts.c (vectorizable_store): When looking for
	smaller vector types to perform grouped strided loads/stores
	make sure the mode is supported by the target.
	(vectorizable_load): Likewise.

	* gcc.target/i386/pr84278.c: New testcase.
diff mbox series

Patch

Index: gcc/tree-vect-stmts.c
===================================================================
--- gcc/tree-vect-stmts.c	(revision 257477)
+++ gcc/tree-vect-stmts.c	(working copy)
@@ -6510,6 +6558,7 @@  vectorizable_store (gimple *stmt, gimple
 	      machine_mode vmode;
 	      if (!mode_for_vector (elmode, group_size).exists (&vmode)
 		  || !VECTOR_MODE_P (vmode)
+		  || !targetm.vector_mode_supported_p (vmode)
 		  || (convert_optab_handler (vec_extract_optab,
 					     TYPE_MODE (vectype), vmode)
 		      == CODE_FOR_nothing))
@@ -6528,6 +6577,7 @@  vectorizable_store (gimple *stmt, gimple
 		     element size stores.  */
 		  if (mode_for_vector (elmode, lnunits).exists (&vmode)
 		      && VECTOR_MODE_P (vmode)
+		      && targetm.vector_mode_supported_p (vmode)
 		      && (convert_optab_handler (vec_extract_optab,
 						 vmode, elmode)
 			  != CODE_FOR_nothing))
@@ -7573,6 +7633,7 @@  vectorizable_load (gimple *stmt, gimple_
 	      machine_mode vmode;
 	      if (mode_for_vector (elmode, group_size).exists (&vmode)
 		  && VECTOR_MODE_P (vmode)
+		  && targetm.vector_mode_supported_p (vmode)
 		  && (convert_optab_handler (vec_init_optab,
 					     TYPE_MODE (vectype), vmode)
 		      != CODE_FOR_nothing))
@@ -7598,6 +7659,7 @@  vectorizable_load (gimple *stmt, gimple_
 		     element loads of the original vector type.  */
 		  if (mode_for_vector (elmode, lnunits).exists (&vmode)
 		      && VECTOR_MODE_P (vmode)
+		      && targetm.vector_mode_supported_p (vmode)
 		      && (convert_optab_handler (vec_init_optab, vmode, elmode)
 			  != CODE_FOR_nothing))
 		    {
Index: gcc/testsuite/gcc.target/i386/pr84278.c
===================================================================
--- gcc/testsuite/gcc.target/i386/pr84278.c	(nonexistent)
+++ gcc/testsuite/gcc.target/i386/pr84278.c	(working copy)
@@ -0,0 +1,18 @@ 
+/* { dg-do compile } */
+/* { dg-options "-O3 -msse2" } */
+
+float A[1024];
+float B[1024];
+int s;
+
+void foo(void)
+{
+  int i;
+  for (i = 0; i < 128; i++)
+    {
+      B[i*2+0] = A[i*s+0];
+      B[i*2+1] = A[i*s+1];
+    }
+}
+
+/* { dg-final { scan-assembler-not "\(%.sp\)" } } */