More PR84037 fixing

Message ID alpine.LSU.2.20.1802121313330.18265@zhemvz.fhfr.qr
State New
Headers show
Series
  • More PR84037 fixing
Related show

Commit Message

Richard Biener Feb. 12, 2018, 12:16 p.m.
The following fixes two issues I found while investigating the costing
of vectorization for capacita.  First we are missing to CSE between
SLP instances, that's easy to fix.  Second we are double-counting
hybrid SLP stmts.  Fixing both leads to SLP vectorization being profitable
for AVX256 (but still not AVX128).

Bootstrapped on x86_64-unknown-linux-gnu, testing in progress.  Checked
SPEC 2k6 for build and test (with and without LTO).

Richard.

2018-02-12  Richard Biener  <rguenther@suse.de>

	PR tree-optimization/84037
	* tree-vect-slp.c (vect_analyze_slp_cost): Add visited
	parameter, move visited init to caller.
	(vect_slp_analyze_operations): Separate cost from validity
	check, initialize visited once for all instances.
	(vect_schedule_slp): Analyze map to CSE vectorized nodes once
	for all instances.
	* tree-vect-stmts.c (vect_model_simple_cost): Make early
	out an assert.
	(vect_model_promotion_demotion_cost): Likewise.
	(vectorizable_bswap): Guard cost modeling with !slp_node
	instead of !PURE_SLP_STMT to avoid double-counting on hybrid
	SLP stmts.
	(vectorizable_call): Likewise.
	(vectorizable_conversion): Likewise.
	(vectorizable_assignment): Likewise.
	(vectorizable_shift): Likewise.
	(vectorizable_operation): Likewise.
	(vectorizable_store): Likewise.
	(vectorizable_load): Likewise.
	(vectorizable_condition): Likewise.
	(vectorizable_comparison): Likewise.

Patch

Index: gcc/tree-vect-slp.c
===================================================================
--- gcc/tree-vect-slp.c	(revision 257581)
+++ gcc/tree-vect-slp.c	(working copy)
@@ -2003,17 +2003,13 @@  vect_analyze_slp_cost_1 (slp_instance in
 /* Compute the cost for the SLP instance INSTANCE.  */
 
 static void
-vect_analyze_slp_cost (slp_instance instance, void *data)
+vect_analyze_slp_cost (slp_instance instance, void *data, scalar_stmts_set_t *visited)
 {
   stmt_vector_for_cost body_cost_vec, prologue_cost_vec;
   unsigned ncopies_for_cost;
   stmt_info_for_cost *si;
   unsigned i;
 
-  if (dump_enabled_p ())
-    dump_printf_loc (MSG_NOTE, vect_location,
-		     "=== vect_analyze_slp_cost ===\n");
-
   /* Calculate the number of vector stmts to create based on the unrolling
      factor (number of vectors is 1 if NUNITS >= GROUP_SIZE, and is
      GROUP_SIZE / NUNITS otherwise.  */
@@ -2050,11 +2046,9 @@  vect_analyze_slp_cost (slp_instance inst
 
   prologue_cost_vec.create (10);
   body_cost_vec.create (10);
-  scalar_stmts_set_t *visited = new scalar_stmts_set_t ();
   vect_analyze_slp_cost_1 (instance, SLP_INSTANCE_TREE (instance),
 			   &prologue_cost_vec, &body_cost_vec,
 			   ncopies_for_cost, visited);
-  delete visited;
 
   /* Record the prologue costs, which were delayed until we were
      sure that SLP was successful.  */
@@ -2871,13 +2865,19 @@  vect_slp_analyze_operations (vec_info *v
           vinfo->slp_instances.ordered_remove (i);
 	}
       else
-	{
-	  /* Compute the costs of the SLP instance.  */
-	  vect_analyze_slp_cost (instance, vinfo->target_cost_data);
-	  i++;
-	}
+	i++;
     }
 
+  if (dump_enabled_p ())
+    dump_printf_loc (MSG_NOTE, vect_location,
+		     "=== vect_analyze_slp_cost ===\n");
+
+  /* Compute the costs of the SLP instances.  */
+  scalar_stmts_set_t *visited = new scalar_stmts_set_t ();
+  for (i = 0; vinfo->slp_instances.iterate (i, &instance); ++i)
+    vect_analyze_slp_cost (instance, vinfo->target_cost_data, visited);
+  delete visited;
+
   return !vinfo->slp_instances.is_empty ();
 }
 
@@ -4246,19 +4246,20 @@  vect_schedule_slp (vec_info *vinfo)
   unsigned int i;
   bool is_store = false;
 
+
+  scalar_stmts_to_slp_tree_map_t *bst_map
+    = new scalar_stmts_to_slp_tree_map_t ();
   slp_instances = vinfo->slp_instances;
   FOR_EACH_VEC_ELT (slp_instances, i, instance)
     {
       /* Schedule the tree of INSTANCE.  */
-      scalar_stmts_to_slp_tree_map_t *bst_map
-	= new scalar_stmts_to_slp_tree_map_t ();
       is_store = vect_schedule_slp_instance (SLP_INSTANCE_TREE (instance),
                                              instance, bst_map);
-      delete bst_map;
       if (dump_enabled_p ())
 	dump_printf_loc (MSG_NOTE, vect_location,
                          "vectorizing stmts using SLP.\n");
     }
+  delete bst_map;
 
   FOR_EACH_VEC_ELT (slp_instances, i, instance)
     {
Index: gcc/tree-vect-stmts.c
===================================================================
--- gcc/tree-vect-stmts.c	(revision 257581)
+++ gcc/tree-vect-stmts.c	(working copy)
@@ -826,8 +826,7 @@  vect_model_simple_cost (stmt_vec_info st
   int inside_cost = 0, prologue_cost = 0;
 
   /* The SLP costs were already calculated during SLP tree build.  */
-  if (PURE_SLP_STMT (stmt_info))
-    return;
+  gcc_assert (!PURE_SLP_STMT (stmt_info));
 
   /* Cost the "broadcast" of a scalar operand in to a vector operand.
      Use scalar_to_vec to cost the broadcast, as elsewhere in the vector
@@ -864,8 +863,7 @@  vect_model_promotion_demotion_cost (stmt
   void *target_cost_data;
 
   /* The SLP costs were already calculated during SLP tree build.  */
-  if (PURE_SLP_STMT (stmt_info))
-    return;
+  gcc_assert (!PURE_SLP_STMT (stmt_info));
 
   if (loop_vinfo)
     target_cost_data = LOOP_VINFO_TARGET_COST_DATA (loop_vinfo);
@@ -2891,7 +2889,7 @@  vectorizable_bswap (gimple *stmt, gimple
       if (dump_enabled_p ())
         dump_printf_loc (MSG_NOTE, vect_location, "=== vectorizable_bswap ==="
                          "\n");
-      if (! PURE_SLP_STMT (stmt_info))
+      if (! slp_node)
 	{
 	  add_stmt_cost (stmt_info->vinfo->target_cost_data,
 			 1, vector_stmt, stmt_info, 0, vect_prologue);
@@ -3210,10 +3208,13 @@  vectorizable_call (gimple *gs, gimple_st
       if (dump_enabled_p ())
         dump_printf_loc (MSG_NOTE, vect_location, "=== vectorizable_call ==="
                          "\n");
-      vect_model_simple_cost (stmt_info, ncopies, dt, ndts, NULL, NULL);
-      if (ifn != IFN_LAST && modifier == NARROW && !slp_node)
-	add_stmt_cost (stmt_info->vinfo->target_cost_data, ncopies / 2,
-		       vec_promote_demote, stmt_info, 0, vect_body);
+      if (!slp_node)
+	{
+	  vect_model_simple_cost (stmt_info, ncopies, dt, ndts, NULL, NULL);
+	  if (ifn != IFN_LAST && modifier == NARROW && !slp_node)
+	    add_stmt_cost (stmt_info->vinfo->target_cost_data, ncopies / 2,
+			   vec_promote_demote, stmt_info, 0, vect_body);
+	}
 
       return true;
     }
@@ -4742,17 +4743,20 @@  vectorizable_conversion (gimple *stmt, g
       if (code == FIX_TRUNC_EXPR || code == FLOAT_EXPR)
         {
 	  STMT_VINFO_TYPE (stmt_info) = type_conversion_vec_info_type;
-	  vect_model_simple_cost (stmt_info, ncopies, dt, ndts, NULL, NULL);
+	  if (!slp_node)
+	    vect_model_simple_cost (stmt_info, ncopies, dt, ndts, NULL, NULL);
 	}
       else if (modifier == NARROW)
 	{
 	  STMT_VINFO_TYPE (stmt_info) = type_demotion_vec_info_type;
-	  vect_model_promotion_demotion_cost (stmt_info, dt, multi_step_cvt);
+	  if (!slp_node)
+	    vect_model_promotion_demotion_cost (stmt_info, dt, multi_step_cvt);
 	}
       else
 	{
 	  STMT_VINFO_TYPE (stmt_info) = type_promotion_vec_info_type;
-	  vect_model_promotion_demotion_cost (stmt_info, dt, multi_step_cvt);
+	  if (!slp_node)
+	    vect_model_promotion_demotion_cost (stmt_info, dt, multi_step_cvt);
 	}
       interm_types.release ();
       return true;
@@ -5149,7 +5153,8 @@  vectorizable_assignment (gimple *stmt, g
       if (dump_enabled_p ())
         dump_printf_loc (MSG_NOTE, vect_location,
                          "=== vectorizable_assignment ===\n");
-      vect_model_simple_cost (stmt_info, ncopies, dt, ndts, NULL, NULL);
+      if (!slp_node)
+	vect_model_simple_cost (stmt_info, ncopies, dt, ndts, NULL, NULL);
       return true;
     }
 
@@ -5513,7 +5518,8 @@  vectorizable_shift (gimple *stmt, gimple
       if (dump_enabled_p ())
         dump_printf_loc (MSG_NOTE, vect_location,
                          "=== vectorizable_shift ===\n");
-      vect_model_simple_cost (stmt_info, ncopies, dt, ndts, NULL, NULL);
+      if (!slp_node)
+	vect_model_simple_cost (stmt_info, ncopies, dt, ndts, NULL, NULL);
       return true;
     }
 
@@ -5836,7 +5842,8 @@  vectorizable_operation (gimple *stmt, gi
       if (dump_enabled_p ())
         dump_printf_loc (MSG_NOTE, vect_location,
                          "=== vectorizable_operation ===\n");
-      vect_model_simple_cost (stmt_info, ncopies, dt, ndts, NULL, NULL);
+      if (!slp_node)
+	vect_model_simple_cost (stmt_info, ncopies, dt, ndts, NULL, NULL);
       return true;
     }
 
@@ -6240,7 +6247,7 @@  vectorizable_store (gimple *stmt, gimple
 
       STMT_VINFO_TYPE (stmt_info) = store_vec_info_type;
       /* The SLP costs are calculated during SLP analysis.  */
-      if (!PURE_SLP_STMT (stmt_info))
+      if (!slp_node)
 	vect_model_store_cost (stmt_info, ncopies, memory_access_type,
 			       vls_type, NULL, NULL, NULL);
       return true;
@@ -7451,7 +7458,7 @@  vectorizable_load (gimple *stmt, gimple_
 
       STMT_VINFO_TYPE (stmt_info) = load_vec_info_type;
       /* The SLP costs are calculated during SLP analysis.  */
-      if (!PURE_SLP_STMT (stmt_info))
+      if (! slp_node)
 	vect_model_load_cost (stmt_info, ncopies, memory_access_type,
 			      NULL, NULL, NULL);
       return true;
@@ -8673,7 +8680,8 @@  vectorizable_condition (gimple *stmt, gi
       if (expand_vec_cond_expr_p (vectype, comp_vectype,
 				     cond_code))
 	{
-	  vect_model_simple_cost (stmt_info, ncopies, dts, ndts, NULL, NULL);
+	  if (!slp_node)
+	    vect_model_simple_cost (stmt_info, ncopies, dts, ndts, NULL, NULL);
 	  return true;
 	}
       return false;
@@ -9037,8 +9045,9 @@  vectorizable_comparison (gimple *stmt, g
   if (!vec_stmt)
     {
       STMT_VINFO_TYPE (stmt_info) = comparison_vec_info_type;
-      vect_model_simple_cost (stmt_info, ncopies * (1 + (bitop2 != NOP_EXPR)),
-			      dts, ndts, NULL, NULL);
+      if (!slp_node)
+	vect_model_simple_cost (stmt_info, ncopies * (1 + (bitop2 != NOP_EXPR)),
+				dts, ndts, NULL, NULL);
       if (bitop1 == NOP_EXPR)
 	return expand_vec_cmp_expr_p (vectype, mask_type, code);
       else