diff mbox series

Adjust BB vectorization SLP build heuristics

Message ID nycvar.YFH.7.76.2010161449120.31629@elmra.sevgm.obk
State New
Headers show
Series Adjust BB vectorization SLP build heuristics | expand

Commit Message

Richard Biener Oct. 16, 2020, 12:49 p.m. UTC
This changes SLP def gathering to not fail due to mismatched
def type but instead demote the def to external.  This allows the
new testcase to be vectorized in full (with GCC 10 it is not
vectorized at all and with current trunk we vectorize only the
store).  This is important since with BB vectorization being
applied to bigger pieces of code the chance that we mix
internal and external defs for an operand that should end up
treated as external (built from scalars) increases.

Bootstrapped on x86_64-unknown-linux-gnu, testing in progress.

2020-10-16  Richard Biener  <rguenther@suse.de>

	* tree-vect-slp.c (vect_get_and_check_slp_defs): For BB
	vectorization swap operands only if it helps, demote mismatches to
	external.

	* gcc.dg/vect/bb-slp-53.c: New testcase.
---
 gcc/testsuite/gcc.dg/vect/bb-slp-53.c | 20 +++++++++++++++++
 gcc/tree-vect-slp.c                   | 31 +++++++++++++++++++++------
 2 files changed, 45 insertions(+), 6 deletions(-)
 create mode 100644 gcc/testsuite/gcc.dg/vect/bb-slp-53.c
diff mbox series

Patch

diff --git a/gcc/testsuite/gcc.dg/vect/bb-slp-53.c b/gcc/testsuite/gcc.dg/vect/bb-slp-53.c
new file mode 100644
index 00000000000..f3b5f317444
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/bb-slp-53.c
@@ -0,0 +1,20 @@ 
+/* { dg-do compile } */
+/* { dg-require-effective-target vect_double } */
+
+double a[2], b[2];
+
+void foo(double x, double y)
+{
+  double breakme1 = y + 3.;
+  double a1 = b[1] + 2.;
+  double breakme0 = x;
+  double a0 = b[0] + 1.;
+  a[0] = a0 * breakme0;
+  a[1] = a1 * breakme1;
+}
+
+/* We should vectorize the SLP opportunity starting from the
+   grouped store to a[] including the load from b[] at the
+   leaf even though the multiplication requires another
+   vector invariant to be built.  */
+/* { dg-final { scan-tree-dump "transform load" "slp2" } } */
diff --git a/gcc/tree-vect-slp.c b/gcc/tree-vect-slp.c
index cfb79e2651f..c3e6d67067c 100644
--- a/gcc/tree-vect-slp.c
+++ b/gcc/tree-vect-slp.c
@@ -564,8 +564,15 @@  vect_get_and_check_slp_defs (vec_info *vinfo, unsigned char swap,
 		      != (oprnd_info->first_dt != vect_reduction_def))))
 	    {
 	      /* Try swapping operands if we got a mismatch.  For BB
-		 vectorization only in case that will improve things.  */
-	      if (i == commutative_op && !swapped)
+		 vectorization only in case it will clearly improve things.  */
+	      if (i == commutative_op && !swapped
+		  && (!is_a <bb_vec_info> (vinfo)
+		      || (!vect_def_types_match ((*oprnds_info)[i+1]->first_dt,
+						 dts[i+1])
+			  && (vect_def_types_match (oprnd_info->first_dt,
+						    dts[i+1])
+			      || vect_def_types_match
+				   ((*oprnds_info)[i+1]->first_dt, dts[i])))))
 		{
 		  if (dump_enabled_p ())
 		    dump_printf_loc (MSG_NOTE, vect_location,
@@ -579,10 +586,22 @@  vect_get_and_check_slp_defs (vec_info *vinfo, unsigned char swap,
 		  continue;
 		}
 
-	      if (dump_enabled_p ())
-		dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
-				 "Build SLP failed: different types\n");
-	      return 1;
+	      if (is_a <bb_vec_info> (vinfo))
+		{
+		  /* Now for commutative ops we should see whether we can
+		     make the other operand matching.  */
+		  if (dump_enabled_p ())
+		    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
+				     "treating operand as external\n");
+		  oprnd_info->first_dt = dt = vect_external_def;
+		}
+	      else
+		{
+		  if (dump_enabled_p ())
+		    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
+				     "Build SLP failed: different types\n");
+		  return 1;
+		}
 	    }
 
       /* Make sure to demote the overall operand to external.  */