diff mbox

Support reduction chain and SLP reduction at the same time

Message ID alpine.LSU.2.20.1706291240360.23185@zhemvz.fhfr.qr
State New
Headers show

Commit Message

Richard Biener June 29, 2017, 10:43 a.m. UTC
I noticed vect_analyze_slp didn't try SLP reduction when it detected
any reduction chain.  That's because the LOOP_VINFO_REDUCTIONS array
contains also the detected chains -- but a reduction chain can only
be vectorized as reduction chain (well, I'm going to fix that!  I
just ran into this code in this process).

The following rectifies this by properly not putting reduction chains
onto LOOP_VINFO_REDUCTIONS, simplifying vect_analyze_slp thereby.

The testcase is now vectorized with full SLP, for v4si that's an
unroll factor of only 2 compared to previously where we used
interelaving with unroll factor 4 and two reductions to process
the remaining SLP reduction (and used SLP for the reduction chain).

Bootstrap and regtest running on x86_64-unknown-linux-gnu.

Richard.

2017-06-29  Richard Biener  <rguenther@suse.de>

	* tree-vect-loop.c (vect_analyze_scalar_cycles_1): Do not add
	reduction chains to LOOP_VINFO_REDUCTIONS.
	* tree-vect-slp.c (vect_analyze_slp): Continue looking for
	SLP reductions after processing reduction chains.

	* gcc.dg/vect/slp-reduc-8.c: New testcase.
diff mbox

Patch

Index: gcc/tree-vect-loop.c
===================================================================
--- gcc/tree-vect-loop.c	(revision 249729)
+++ gcc/tree-vect-loop.c	(working copy)
@@ -890,8 +895,10 @@  vect_analyze_scalar_cycles_1 (loop_vec_i
                   STMT_VINFO_DEF_TYPE (vinfo_for_stmt (reduc_stmt)) =
                                                            vect_reduction_def;
                   /* Store the reduction cycles for possible vectorization in
-                     loop-aware SLP.  */
-                  LOOP_VINFO_REDUCTIONS (loop_vinfo).safe_push (reduc_stmt);
+                     loop-aware SLP if it was not detected as reduction
+		     chain.  */
+		  if (! GROUP_FIRST_ELEMENT (vinfo_for_stmt (reduc_stmt)))
+		    LOOP_VINFO_REDUCTIONS (loop_vinfo).safe_push (reduc_stmt);
                 }
             }
         }
Index: gcc/tree-vect-slp.c
===================================================================
--- gcc/tree-vect-slp.c	(revision 249729)
+++ gcc/tree-vect-slp.c	(working copy)
@@ -2102,15 +2103,13 @@  vect_analyze_slp (vec_info *vinfo, unsig
 {
   unsigned int i;
   gimple *first_element;
-  bool ok = false;
 
   if (dump_enabled_p ())
     dump_printf_loc (MSG_NOTE, vect_location, "=== vect_analyze_slp ===\n");
 
   /* Find SLP sequences starting from groups of grouped stores.  */
   FOR_EACH_VEC_ELT (vinfo->grouped_stores, i, first_element)
-    if (vect_analyze_slp_instance (vinfo, first_element, max_tree_size))
-      ok = true;
+    vect_analyze_slp_instance (vinfo, first_element, max_tree_size);
 
   if (loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo))
     {
@@ -2118,22 +2117,15 @@  vect_analyze_slp (vec_info *vinfo, unsig
 	{
 	  /* Find SLP sequences starting from reduction chains.  */
 	  FOR_EACH_VEC_ELT (loop_vinfo->reduction_chains, i, first_element)
-	      if (vect_analyze_slp_instance (vinfo, first_element,
+	    if (! vect_analyze_slp_instance (vinfo, first_element,
 					     max_tree_size))
-		ok = true;
-	      else
-		return false;
-
-	  /* Don't try to vectorize SLP reductions if reduction chain was
-	     detected.  */
-	  return ok;
+	      return false;
 	}
 
       /* Find SLP sequences starting from groups of reductions.  */
-      if (loop_vinfo->reductions.length () > 1
-	  && vect_analyze_slp_instance (vinfo, loop_vinfo->reductions[0],
-					max_tree_size))
-	ok = true;
+      if (loop_vinfo->reductions.length () > 1)
+	vect_analyze_slp_instance (vinfo, loop_vinfo->reductions[0],
+				   max_tree_size);
     }
 
   return true;
Index: gcc/testsuite/gcc.dg/vect/slp-reduc-8.c
===================================================================
--- gcc/testsuite/gcc.dg/vect/slp-reduc-8.c	(nonexistent)
+++ gcc/testsuite/gcc.dg/vect/slp-reduc-8.c	(working copy)
@@ -0,0 +1,48 @@ 
+/* { dg-require-effective-target vect_int } */
+
+#include "tree-vect.h"
+
+static int a[512], b[512];
+
+void __attribute__((noinline,noclone))
+foo (int *sum1p, int *sum2p, int *sum3p)
+{
+  int sum1 = 0;
+  int sum2 = 0;
+  int sum3 = 0;
+  /* Check that we vectorize a reduction chain and a SLP reduction
+     at the same time.  */
+  for (int i = 0; i < 256; ++i)
+    {
+      sum1 += a[2*i];
+      sum1 += a[2*i + 1];
+      sum2 += b[2*i];
+      sum3 += b[2*i + 1];
+    }
+  *sum1p = sum1;
+  *sum2p = sum2;
+  *sum3p = sum3;
+}
+
+int main()
+{
+  check_vect ();
+
+  for (int i = 0; i < 256; ++i)
+    {
+      a[2*i] = i;
+      a[2*i + 1] = i/2;
+      b[2*i] = i + 1;
+      b[2*i + 1] = i/2 + 1;
+      __asm__ volatile ("" : : : "memory");
+    }
+  int sum1, sum2, sum3;
+  foo (&sum1, &sum2, &sum3);
+  if (sum1 != 48896 || sum2 != 32896 || sum3 != 16512)
+    abort ();
+
+  return 0;
+}
+
+/* { dg-final { scan-tree-dump "Loop contains only SLP stmts" "vect" } } */
+/* { dg-final { scan-tree-dump "vectorized 1 loops" "vect" } } */