diff mbox series

Fix PR65930

Message ID nycvar.YFH.7.76.1910291557481.5566@zhemvz.fhfr.qr
State New
Headers show
Series Fix PR65930 | expand

Commit Message

Richard Biener Oct. 29, 2019, 2:59 p.m. UTC
The following fixes the last testcase (actually the first) missing
in the PR showing missed reduction vectorization with sign-changes
in place.  Actually we already vectorize the first testcase now
but fail to use a SLP reduction group which pessimizes code-generation.

Bootstrapped on x86_64-unknown-linux-gnu, testing in progress.

Richard.

2019-10-29  Richard Biener  <rguenther@suse.de>

	PR tree-optimization/65930
	* tree-vect-loop.c (vect_is_simple_reduction): For reduction
	chains also allow a leading and trailing conversion.
	* tree-vect-slp.c (vect_get_and_check_slp_defs): Handle
	intermediate reduction chains.
	(vect_analyze_slp_instance): Likewise.  Build a SLP
	node for a trailing conversion manually.

	* gcc.dg/vect/pr65930-2.c: New testcase.
diff mbox series

Patch

Index: gcc/tree-vect-loop.c
===================================================================
--- gcc/tree-vect-loop.c	(revision 277573)
+++ gcc/tree-vect-loop.c	(working copy)
@@ -3005,14 +3005,21 @@  vect_is_simple_reduction (loop_vec_info
       for (i = path.length () - 1; i >= 1; --i)
 	{
 	  gimple *stmt = USE_STMT (path[i].second);
-	  if (gimple_assign_rhs_code (stmt) != code
+	  stmt_vec_info stmt_info = loop_info->lookup_stmt (stmt);
+	  STMT_VINFO_REDUC_IDX (stmt_info)
+	    = path[i].second->use - gimple_assign_rhs1_ptr (stmt);
+	  enum tree_code stmt_code = gimple_assign_rhs_code (stmt);
+	  bool leading_conversion = (CONVERT_EXPR_CODE_P (stmt_code)
+				     && (i == 1 || i == path.length () - 1));
+	  if ((stmt_code != code && !leading_conversion)
 	      /* We can only handle the final value in epilogue
 		 generation for reduction chains.  */
 	      || (i != 1 && !has_single_use (gimple_assign_lhs (stmt))))
 	    is_slp_reduc = false;
-	  stmt_vec_info stmt_info = loop_info->lookup_stmt (stmt);
-	  STMT_VINFO_REDUC_IDX (stmt_info)
-	    = path[i].second->use - gimple_assign_rhs1_ptr (stmt);
+	  /* For reduction chains we support a trailing/leading
+	     conversions.  We do not store those in the actual chain.  */
+	  if (leading_conversion)
+	    continue;
 	  reduc_chain.safe_push (stmt_info);
 	}
       if (is_slp_reduc && reduc_chain.length () > 1)
Index: gcc/tree-vect-slp.c
===================================================================
--- gcc/tree-vect-slp.c	(revision 277573)
+++ gcc/tree-vect-slp.c	(working copy)
@@ -419,6 +419,13 @@  again:
 
       if (first)
 	{
+	  /* For the swapping logic below force vect_reduction_def
+	     for the reduction op in a SLP reduction group.  */
+	  if (!STMT_VINFO_DATA_REF (stmt_info)
+	      && REDUC_GROUP_FIRST_ELEMENT (stmt_info)
+	      && (int)i == STMT_VINFO_REDUC_IDX (stmt_info)
+	      && def_stmt_info)
+	    dt = vect_reduction_def;
 	  oprnd_info->first_dt = dt;
 	  oprnd_info->first_op_type = TREE_TYPE (oprnd);
 	}
@@ -2041,7 +2048,8 @@  vect_analyze_slp_instance (vec_info *vin
       /* Mark the first element of the reduction chain as reduction to properly
 	 transform the node.  In the reduction analysis phase only the last
 	 element of the chain is marked as reduction.  */
-      STMT_VINFO_DEF_TYPE (stmt_info) = vect_reduction_def;
+      STMT_VINFO_DEF_TYPE (stmt_info)
+	= STMT_VINFO_DEF_TYPE (scalar_stmts.last ());
       STMT_VINFO_REDUC_DEF (vect_orig_stmt (stmt_info))
 	= STMT_VINFO_REDUC_DEF (vect_orig_stmt (scalar_stmts.last ()));
     }
@@ -2071,6 +2079,34 @@  vect_analyze_slp_instance (vec_info *vin
   delete bst_map;
   if (node != NULL)
     {
+      /* If this is a reduction chain with a conversion in front
+         amend the SLP tree with a node for that.  */
+      if (!dr
+	  && REDUC_GROUP_FIRST_ELEMENT (stmt_info)
+	  && STMT_VINFO_DEF_TYPE (stmt_info) != vect_reduction_def)
+	{
+	  /* Get at the conversion stmt - we know it's the single use
+	     of the last stmt of the reduction chain.  */
+	  gimple *tem = vect_orig_stmt (scalar_stmts[group_size - 1])->stmt;
+	  use_operand_p use_p;
+	  gimple *use_stmt;
+	  bool r = single_imm_use (gimple_assign_lhs (tem), &use_p, &use_stmt);
+	  gcc_assert (r);
+	  next_info = vinfo->lookup_stmt (use_stmt);
+	  next_info = vect_stmt_to_vectorize (next_info);
+	  scalar_stmts = vNULL;
+	  scalar_stmts.create (group_size);
+	  for (unsigned i = 0; i < group_size; ++i)
+	    scalar_stmts.quick_push (next_info);
+	  slp_tree conv = vect_create_new_slp_node (scalar_stmts);
+	  SLP_TREE_CHILDREN (conv).quick_push (node);
+	  node = conv;
+	  /* We also have to fake this conversion stmt as SLP reduction group
+	     so we don't have to mess with too much code elsewhere.  */
+	  REDUC_GROUP_FIRST_ELEMENT (next_info) = next_info;
+	  REDUC_GROUP_NEXT_ELEMENT (next_info) = NULL;
+	}
+
       /* Calculate the unrolling factor based on the smallest type.  */
       poly_uint64 unrolling_factor
 	= calculate_unrolling_factor (max_nunits, group_size);
Index: gcc/testsuite/gcc.dg/vect/pr65930-2.c
===================================================================
--- gcc/testsuite/gcc.dg/vect/pr65930-2.c	(nonexistent)
+++ gcc/testsuite/gcc.dg/vect/pr65930-2.c	(working copy)
@@ -0,0 +1,28 @@ 
+/* { dg-require-effective-target vect_int } */
+
+#include "tree-vect.h"
+
+int __attribute__((noipa))
+bar (unsigned int *x, int n)
+{
+  int sum = 4;
+  x = __builtin_assume_aligned (x, __BIGGEST_ALIGNMENT__);
+  for (int i = 0; i < n; ++i)
+    sum += x[i*4+0]+ x[i*4 + 1] + x[i*4 + 2] + x[i*4 + 3];
+  return sum;
+}
+
+int
+main ()
+{
+  static int a[16] __attribute__((aligned(__BIGGEST_ALIGNMENT__)))
+    = { 1, 3, 5, 8, 9, 10, 17, 18, 23, 29, 30, 55, 42, 2, 3, 1 };
+  check_vect ();
+  if (bar (a, 4) != 260)
+    abort ();
+  return 0;
+}
+
+/* { dg-final { scan-tree-dump "vectorized 1 loops" "vect" } } */
+/* { dg-final { scan-tree-dump "Loop contains only SLP stmts" "vect" } } */
+/* { dg-final { scan-tree-dump "vectorizing stmts using SLP" "vect" } } */