diff mbox

Fix SLP PR58135.

Message ID CY1PR1201MB109884E1A7DE65BED803FAF18F750@CY1PR1201MB1098.namprd12.prod.outlook.com
State New
Headers show

Commit Message

Kumar, Venkataramanan May 14, 2016, 6:04 p.m. UTC
Hi Richard, 

As per your suggestion I tried to fix the PR by splitting the SLP store group at vector boundary after the SLP tree is built.

Boot strap PASSED on x86_64.
Checked the patch with check_GNU_style.sh.

The gfortran.dg/pr46519-1.f test now does SLP vectorization. Hence it  generated 2 more vzeroupper.  
As recommended I adjusted the test case by adding -fno-tree-slp-vectorize to make it as expected after loop vectorization.

The following tests are now passing.

------ Snip-----
Tests that now work, but didn't before:

gcc.dg/vect/bb-slp-19.c -flto -ffat-lto-objects  scan-tree-dump-times slp2 "basic block vectorized" 1

gcc.dg/vect/bb-slp-19.c scan-tree-dump-times slp2 "basic block vectorized" 1

New tests that PASS:

gcc.dg/vect/pr58135.c (test for excess errors)
gcc.dg/vect/pr58135.c -flto -ffat-lto-objects (test for excess errors)

------ Snip-----

ChangeLog

2016-05-14  Venkataramanan Kumar  <Venkataramanan.kumar@amd.com>
     PR tree-optimization/58135
    * tree-vect-slp.c:  When group size is not multiple of vector size, 
     allow splitting of store group at vector boundary. 

Test suite  ChangeLog
2016-05-14  Venkataramanan Kumar  <Venkataramanan.kumar@amd.com>
    * gcc.dg/vect/bb-slp-19.c:  Remove XFAIL. 
    * gcc.dg/vect/pr58135.c:  Add new.
    * gfortran.dg/pr46519-1.f: Adjust test case.

The attached patch Ok for trunk?

Regards,
Venkat.
diff mbox

Patch

diff --git a/gcc/testsuite/gcc.dg/vect/bb-slp-19.c b/gcc/testsuite/gcc.dg/vect/bb-slp-19.c
index 42cd294..c282155 100644
--- a/gcc/testsuite/gcc.dg/vect/bb-slp-19.c
+++ b/gcc/testsuite/gcc.dg/vect/bb-slp-19.c
@@ -53,5 +53,5 @@  int main (void)
   return 0;
 }
 
-/* { dg-final { scan-tree-dump-times "basic block vectorized" 1 "slp2"  { xfail *-*-* }  } } */
+/* { dg-final { scan-tree-dump-times "basic block vectorized" 1 "slp2" } } */
   
diff --git a/gcc/testsuite/gcc.dg/vect/pr58135.c b/gcc/testsuite/gcc.dg/vect/pr58135.c
new file mode 100644
index 0000000..ca25000
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/pr58135.c
@@ -0,0 +1,10 @@ 
+/* { dg-do compile } */
+/* { dg-require-effective-target vect_int } */
+
+int a[100];
+void foo ()
+{
+  a[0] = a[1] = a[2] = a[3] = a[4]= 0;
+}
+
+/* { dg-final { scan-tree-dump-times "basic block vectorized" 1 "slp2" } } */
diff --git a/gcc/testsuite/gfortran.dg/pr46519-1.f b/gcc/testsuite/gfortran.dg/pr46519-1.f
index 51c64b8..46be9f5 100644
--- a/gcc/testsuite/gfortran.dg/pr46519-1.f
+++ b/gcc/testsuite/gfortran.dg/pr46519-1.f
@@ -1,5 +1,5 @@ 
 ! { dg-do compile { target i?86-*-* x86_64-*-* } }
-! { dg-options "-O3 -mavx -mvzeroupper -mtune=generic -dp" }
+! { dg-options "-O3 -mavx -mvzeroupper -fno-tree-slp-vectorize -mtune=generic -dp" }
 
       PROGRAM MG3XDEMO 
       INTEGER LM, NM, NV, NR, NIT
diff --git a/gcc/tree-vect-slp.c b/gcc/tree-vect-slp.c
index d713848..23a127f 100644
--- a/gcc/tree-vect-slp.c
+++ b/gcc/tree-vect-slp.c
@@ -1754,18 +1754,6 @@  vect_analyze_slp_instance (vec_info *vinfo,
     }
   nunits = TYPE_VECTOR_SUBPARTS (vectype);
 
-  /* Calculate the unrolling factor.  */
-  unrolling_factor = least_common_multiple (nunits, group_size) / group_size;
-  if (unrolling_factor != 1 && is_a <bb_vec_info> (vinfo))
-    {
-      if (dump_enabled_p ())
-        dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
-			 "Build SLP failed: unrolling required in basic"
-			 " block SLP\n");
-
-      return false;
-    }
-
   /* Create a node (a root of the SLP tree) for the packed grouped stores.  */
   scalar_stmts.create (group_size);
   next = stmt;
@@ -1801,126 +1789,151 @@  vect_analyze_slp_instance (vec_info *vinfo,
   /* Build the tree for the SLP instance.  */
   bool *matches = XALLOCAVEC (bool, group_size);
   unsigned npermutes = 0;
-  if ((node = vect_build_slp_tree (vinfo, scalar_stmts, group_size,
-				   &max_nunits, &loads, matches, &npermutes,
-				   NULL, max_tree_size)) != NULL)
-    {
-      /* Calculate the unrolling factor based on the smallest type.  */
-      if (max_nunits > nunits)
-        unrolling_factor = least_common_multiple (max_nunits, group_size)
-                           / group_size;
 
-      if (unrolling_factor != 1 && is_a <bb_vec_info> (vinfo))
-        {
-          if (dump_enabled_p ())
-            dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
-			     "Build SLP failed: unrolling required in basic"
-			     " block SLP\n");
-	  vect_free_slp_tree (node);
-	  loads.release ();
-          return false;
-        }
+  node = vect_build_slp_tree (vinfo, scalar_stmts, group_size,
+			      &max_nunits, &loads, matches, &npermutes,
+			      NULL, max_tree_size);
 
-      /* Create a new SLP instance.  */
-      new_instance = XNEW (struct _slp_instance);
-      SLP_INSTANCE_TREE (new_instance) = node;
-      SLP_INSTANCE_GROUP_SIZE (new_instance) = group_size;
-      SLP_INSTANCE_UNROLLING_FACTOR (new_instance) = unrolling_factor;
-      SLP_INSTANCE_LOADS (new_instance) = loads;
-
-      /* Compute the load permutation.  */
-      slp_tree load_node;
-      bool loads_permuted = false;
-      FOR_EACH_VEC_ELT (loads, i, load_node)
+  if (node != NULL)
+    {
+      /* Calculate the unrolling factor.  */
+      unrolling_factor = least_common_multiple
+			  (nunits, group_size) / group_size;
+
+      if (is_a <bb_vec_info> (vinfo)
+	  && nunits < group_size
+	  && unrolling_factor != 1
+	  && is_a <bb_vec_info> (vinfo))
 	{
-	  vec<unsigned> load_permutation;
-	  int j;
-	  gimple *load, *first_stmt;
-	  bool this_load_permuted = false;
-	  load_permutation.create (group_size);
-	  first_stmt = GROUP_FIRST_ELEMENT
-	      (vinfo_for_stmt (SLP_TREE_SCALAR_STMTS (load_node)[0]));
-	  FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (load_node), j, load)
-	    {
-	      int load_place
-		= vect_get_place_in_interleaving_chain (load, first_stmt);
-	      gcc_assert (load_place != -1);
-	      if (load_place != j)
-		this_load_permuted = true;
-	      load_permutation.safe_push (load_place);
-	    }
-	  if (!this_load_permuted
-	      /* The load requires permutation when unrolling exposes
-	         a gap either because the group is larger than the SLP
-		 group-size or because there is a gap between the groups.  */
-	      && (unrolling_factor == 1
-		  || (group_size == GROUP_SIZE (vinfo_for_stmt (first_stmt))
-		      && GROUP_GAP (vinfo_for_stmt (first_stmt)) == 0)))
+	  dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
+			   "Build SLP failed: store group "
+			   "size not a multiple of the vector size "
+			   "in basic block SLP\n");
+	  /* Fatal mismatch.  */
+	  matches[nunits] = false;
+	}
+      else
+	{
+	  /* Calculate the unrolling factor based on the smallest type.  */
+	  if (max_nunits > nunits)
+	    unrolling_factor
+		= least_common_multiple (max_nunits, group_size)/group_size;
+
+	  if (unrolling_factor != 1 && is_a <bb_vec_info> (vinfo))
 	    {
-	      load_permutation.release ();
-	      continue;
+	      if (dump_enabled_p ())
+		dump_printf_loc (MSG_MISSED_OPTIMIZATION,
+				 vect_location,
+				 "Build SLP failed: unrolling "
+				 "required in basic block SLP\n");
+	      vect_free_slp_tree (node);
+	      loads.release ();
+	      return false;
 	    }
-	  SLP_TREE_LOAD_PERMUTATION (load_node) = load_permutation;
-	  loads_permuted = true;
-	}
 
-      if (loads_permuted)
-        {
-          if (!vect_supported_load_permutation_p (new_instance))
-            {
-              if (dump_enabled_p ())
-                {
-                  dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
-				   "Build SLP failed: unsupported load "
-				   "permutation ");
-                  dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
-                  dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
-                }
-              vect_free_slp_instance (new_instance);
-              return false;
-            }
-        }
+	  /* Create a new SLP instance.  */
+	  new_instance = XNEW (struct _slp_instance);
+	  SLP_INSTANCE_TREE (new_instance) = node;
+	  SLP_INSTANCE_GROUP_SIZE (new_instance) = group_size;
+	  SLP_INSTANCE_UNROLLING_FACTOR (new_instance) = unrolling_factor;
+	  SLP_INSTANCE_LOADS (new_instance) = loads;
 
-      /* If the loads and stores can be handled with load/store-lane
-	 instructions do not generate this SLP instance.  */
-      if (is_a <loop_vec_info> (vinfo)
-	  && loads_permuted
-	  && dr && vect_store_lanes_supported (vectype, group_size))
-	{
+	  /* Compute the load permutation.  */
 	  slp_tree load_node;
+	  bool loads_permuted = false;
 	  FOR_EACH_VEC_ELT (loads, i, load_node)
 	    {
-	      gimple *first_stmt = GROUP_FIRST_ELEMENT
-		  (vinfo_for_stmt (SLP_TREE_SCALAR_STMTS (load_node)[0]));
-	      stmt_vec_info stmt_vinfo = vinfo_for_stmt (first_stmt);
-	      /* Use SLP for strided accesses (or if we can't load-lanes).  */
-	      if (STMT_VINFO_STRIDED_P (stmt_vinfo)
-		  || ! vect_load_lanes_supported
-			(STMT_VINFO_VECTYPE (stmt_vinfo),
-			 GROUP_SIZE (stmt_vinfo)))
-		break;
+	      vec<unsigned> load_permutation;
+	      int j;
+	      gimple *load, *first_stmt;
+	      bool this_load_permuted = false;
+	      load_permutation.create (group_size);
+	      first_stmt = GROUP_FIRST_ELEMENT
+		(vinfo_for_stmt (SLP_TREE_SCALAR_STMTS (load_node)[0]));
+	      FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (load_node), j, load)
+		{
+		  int load_place = vect_get_place_in_interleaving_chain
+				     (load, first_stmt);
+		  gcc_assert (load_place != -1);
+		  if (load_place != j)
+		    this_load_permuted = true;
+		  load_permutation.safe_push (load_place);
+		}
+	      if (!this_load_permuted
+		  /* The load requires permutation when unrolling exposes
+		  a gap either because the group is larger than the SLP
+		  group-size or because there is a gap between the groups.  */
+		  && (unrolling_factor == 1
+		  || (group_size == GROUP_SIZE (vinfo_for_stmt (first_stmt))
+		      && GROUP_GAP (vinfo_for_stmt (first_stmt)) == 0)))
+		{
+		  load_permutation.release ();
+		  continue;
+		}
+	      SLP_TREE_LOAD_PERMUTATION (load_node) = load_permutation;
+	      loads_permuted = true;
 	    }
-	  if (i == loads.length ())
+
+	  if (loads_permuted)
 	    {
-	      if (dump_enabled_p ())
-		dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
-				 "Built SLP cancelled: can use "
-				 "load/store-lanes\n");
-	      vect_free_slp_instance (new_instance);
-	      return false;
+	      if (!vect_supported_load_permutation_p (new_instance))
+		{
+		  if (dump_enabled_p ())
+		    {
+		      dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
+				       "Build SLP failed: unsupported load "
+				       "permutation ");
+		      dump_gimple_stmt (MSG_MISSED_OPTIMIZATION,
+					TDF_SLIM, stmt, 0);
+		      dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
+		    }
+		  vect_free_slp_instance (new_instance);
+		  return false;
+		}
+	    }
+
+	  /* If the loads and stores can be handled with load/store-lan
+	     instructions do not generate this SLP instance.  */
+	  if (is_a <loop_vec_info> (vinfo)
+	      && loads_permuted
+	      && dr && vect_store_lanes_supported (vectype, group_size))
+	    {
+	      slp_tree load_node;
+	      FOR_EACH_VEC_ELT (loads, i, load_node)
+		{
+		  gimple *first_stmt = GROUP_FIRST_ELEMENT
+		    (vinfo_for_stmt (SLP_TREE_SCALAR_STMTS (load_node)[0]));
+		  stmt_vec_info stmt_vinfo = vinfo_for_stmt (first_stmt);
+		  /* Use SLP for strided accesses (or if we
+		     can't load-lanes).  */
+		  if (STMT_VINFO_STRIDED_P (stmt_vinfo)
+		    || ! vect_load_lanes_supported
+			  (STMT_VINFO_VECTYPE (stmt_vinfo),
+			   GROUP_SIZE (stmt_vinfo)))
+		    break;
+		}
+	      if (i == loads.length ())
+		{
+		  if (dump_enabled_p ())
+		    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
+				     "Built SLP cancelled: can use "
+				     "load/store-lanes\n");
+		  vect_free_slp_instance (new_instance);
+		  return false;
+		}
 	    }
-	}
 
-      vinfo->slp_instances.safe_push (new_instance);
+	  vinfo->slp_instances.safe_push (new_instance);
 
-      if (dump_enabled_p ())
-	{
-	  dump_printf_loc (MSG_NOTE, vect_location,
-			   "Final SLP tree for instance:\n");
-	  vect_print_slp_tree (MSG_NOTE, vect_location, node);
-	}
+	  if (dump_enabled_p ())
+	    {
+	      dump_printf_loc (MSG_NOTE, vect_location,
+			       "Final SLP tree for instance:\n");
+	      vect_print_slp_tree (MSG_NOTE, vect_location, node);
+	    }
 
-      return true;
+	  return true;
+	}
     }
 
   /* Failed to SLP.  */