diff mbox series

[06/10] vect: Adjust vectorizable_store costing on VMAT_LOAD_STORE_LANES

Message ID 048c90cf62145799aa31e3ca4edd6f7adc911a6c.1694657494.git.linkw@linux.ibm.com
State New
Headers show
Series vect: Move costing next to the transform for vect store | expand

Commit Message

Kewen.Lin Sept. 14, 2023, 3:11 a.m. UTC
This patch adjusts the cost handling on VMAT_LOAD_STORE_LANES
in function vectorizable_store.  We don't call function
vect_model_store_cost for it any more.  It's the case of
interleaving stores, so it skips all stmts excepting for
first_stmt_info, consider the whole group when costing
first_stmt_info.  This patch shouldn't have any functional
changes.

gcc/ChangeLog:

	* tree-vect-stmts.cc (vect_model_store_cost): Assert it will never
	get VMAT_LOAD_STORE_LANES.
	(vectorizable_store): Adjust the cost handling on VMAT_LOAD_STORE_LANES
	without calling vect_model_store_cost.  Factor out new lambda function
	update_prologue_cost.
---
 gcc/tree-vect-stmts.cc | 110 ++++++++++++++++++++++++++++-------------
 1 file changed, 75 insertions(+), 35 deletions(-)

Comments

Richard Biener Sept. 27, 2023, 11:27 a.m. UTC | #1
On Thu, Sep 14, 2023 at 5:12 AM Kewen Lin <linkw@linux.ibm.com> wrote:
>
> This patch adjusts the cost handling on VMAT_LOAD_STORE_LANES
> in function vectorizable_store.  We don't call function
> vect_model_store_cost for it any more.  It's the case of
> interleaving stores, so it skips all stmts excepting for
> first_stmt_info, consider the whole group when costing
> first_stmt_info.  This patch shouldn't have any functional
> changes.

OK.

> gcc/ChangeLog:
>
>         * tree-vect-stmts.cc (vect_model_store_cost): Assert it will never
>         get VMAT_LOAD_STORE_LANES.
>         (vectorizable_store): Adjust the cost handling on VMAT_LOAD_STORE_LANES
>         without calling vect_model_store_cost.  Factor out new lambda function
>         update_prologue_cost.
> ---
>  gcc/tree-vect-stmts.cc | 110 ++++++++++++++++++++++++++++-------------
>  1 file changed, 75 insertions(+), 35 deletions(-)
>
> diff --git a/gcc/tree-vect-stmts.cc b/gcc/tree-vect-stmts.cc
> index 3d01168080a..fbd16b8a487 100644
> --- a/gcc/tree-vect-stmts.cc
> +++ b/gcc/tree-vect-stmts.cc
> @@ -966,7 +966,8 @@ vect_model_store_cost (vec_info *vinfo, stmt_vec_info stmt_info, int ncopies,
>  {
>    gcc_assert (memory_access_type != VMAT_GATHER_SCATTER
>               && memory_access_type != VMAT_ELEMENTWISE
> -             && memory_access_type != VMAT_STRIDED_SLP);
> +             && memory_access_type != VMAT_STRIDED_SLP
> +             && memory_access_type != VMAT_LOAD_STORE_LANES);
>    unsigned int inside_cost = 0, prologue_cost = 0;
>    stmt_vec_info first_stmt_info = stmt_info;
>    bool grouped_access_p = STMT_VINFO_GROUPED_ACCESS (stmt_info);
> @@ -8408,7 +8409,8 @@ vectorizable_store (vec_info *vinfo,
>        if (grouped_store
>           && !slp
>           && first_stmt_info != stmt_info
> -         && memory_access_type == VMAT_ELEMENTWISE)
> +         && (memory_access_type == VMAT_ELEMENTWISE
> +             || memory_access_type == VMAT_LOAD_STORE_LANES))
>         return true;
>      }
>    gcc_assert (memory_access_type == STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info));
> @@ -8479,6 +8481,31 @@ vectorizable_store (vec_info *vinfo,
>      dump_printf_loc (MSG_NOTE, vect_location, "transform store. ncopies = %d\n",
>                      ncopies);
>
> +  /* Check if we need to update prologue cost for invariant,
> +     and update it accordingly if so.  If it's not for
> +     interleaving store, we can just check vls_type; but if
> +     it's for interleaving store, need to check the def_type
> +     of the stored value since the current vls_type is just
> +     for first_stmt_info.  */
> +  auto update_prologue_cost = [&](unsigned *prologue_cost, tree store_rhs)
> +  {
> +    gcc_assert (costing_p);
> +    if (slp)
> +      return;
> +    if (grouped_store)
> +      {
> +       gcc_assert (store_rhs);
> +       enum vect_def_type cdt;
> +       gcc_assert (vect_is_simple_use (store_rhs, vinfo, &cdt));
> +       if (cdt != vect_constant_def && cdt != vect_external_def)
> +         return;
> +      }
> +    else if (vls_type != VLS_STORE_INVARIANT)
> +      return;
> +    *prologue_cost += record_stmt_cost (cost_vec, 1, scalar_to_vec, stmt_info,
> +                                       0, vect_prologue);
> +  };
> +
>    if (memory_access_type == VMAT_ELEMENTWISE
>        || memory_access_type == VMAT_STRIDED_SLP)
>      {
> @@ -8646,14 +8673,8 @@ vectorizable_store (vec_info *vinfo,
>           if (!costing_p)
>             vect_get_vec_defs (vinfo, next_stmt_info, slp_node, ncopies, op,
>                                &vec_oprnds);
> -         else if (!slp)
> -           {
> -             enum vect_def_type cdt;
> -             gcc_assert (vect_is_simple_use (op, vinfo, &cdt));
> -             if (cdt == vect_constant_def || cdt == vect_external_def)
> -               prologue_cost += record_stmt_cost (cost_vec, 1, scalar_to_vec,
> -                                                  stmt_info, 0, vect_prologue);
> -           }
> +         else
> +           update_prologue_cost (&prologue_cost, op);
>           unsigned int group_el = 0;
>           unsigned HOST_WIDE_INT
>             elsz = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (vectype)));
> @@ -8857,13 +8878,7 @@ vectorizable_store (vec_info *vinfo,
>    if (memory_access_type == VMAT_LOAD_STORE_LANES)
>      {
>        gcc_assert (!slp && grouped_store);
> -      if (costing_p)
> -       {
> -         vect_model_store_cost (vinfo, stmt_info, ncopies, memory_access_type,
> -                                alignment_support_scheme, misalignment,
> -                                vls_type, slp_node, cost_vec);
> -         return true;
> -       }
> +      unsigned inside_cost = 0, prologue_cost = 0;
>        for (j = 0; j < ncopies; j++)
>         {
>           gimple *new_stmt;
> @@ -8879,29 +8894,39 @@ vectorizable_store (vec_info *vinfo,
>                      DR_GROUP_SIZE is the exact number of stmts in the
>                      chain. Therefore, NEXT_STMT_INFO can't be NULL_TREE.  */
>                   op = vect_get_store_rhs (next_stmt_info);
> -                 vect_get_vec_defs_for_operand (vinfo, next_stmt_info, ncopies,
> -                                                op, gvec_oprnds[i]);
> -                 vec_oprnd = (*gvec_oprnds[i])[0];
> -                 dr_chain.quick_push (vec_oprnd);
> +                 if (costing_p)
> +                   update_prologue_cost (&prologue_cost, op);
> +                 else
> +                   {
> +                     vect_get_vec_defs_for_operand (vinfo, next_stmt_info,
> +                                                    ncopies, op,
> +                                                    gvec_oprnds[i]);
> +                     vec_oprnd = (*gvec_oprnds[i])[0];
> +                     dr_chain.quick_push (vec_oprnd);
> +                   }
>                   next_stmt_info = DR_GROUP_NEXT_ELEMENT (next_stmt_info);
>                 }
> -             if (mask)
> +
> +             if (!costing_p)
>                 {
> -                 vect_get_vec_defs_for_operand (vinfo, stmt_info, ncopies,
> -                                                mask, &vec_masks,
> -                                                mask_vectype);
> -                 vec_mask = vec_masks[0];
> -               }
> +                 if (mask)
> +                   {
> +                     vect_get_vec_defs_for_operand (vinfo, stmt_info, ncopies,
> +                                                    mask, &vec_masks,
> +                                                    mask_vectype);
> +                     vec_mask = vec_masks[0];
> +                   }
>
> -             /* We should have catched mismatched types earlier.  */
> -             gcc_assert (
> -               useless_type_conversion_p (vectype, TREE_TYPE (vec_oprnd)));
> -             dataref_ptr
> -               = vect_create_data_ref_ptr (vinfo, first_stmt_info, aggr_type,
> -                                           NULL, offset, &dummy, gsi,
> -                                           &ptr_incr, false, bump);
> +                 /* We should have catched mismatched types earlier.  */
> +                 gcc_assert (
> +                   useless_type_conversion_p (vectype, TREE_TYPE (vec_oprnd)));
> +                 dataref_ptr
> +                   = vect_create_data_ref_ptr (vinfo, first_stmt_info,
> +                                               aggr_type, NULL, offset, &dummy,
> +                                               gsi, &ptr_incr, false, bump);
> +               }
>             }
> -         else
> +         else if (!costing_p)
>             {
>               gcc_assert (!LOOP_VINFO_USING_SELECT_VL_P (loop_vinfo));
>               /* DR_CHAIN is then used as an input to
> @@ -8917,6 +8942,15 @@ vectorizable_store (vec_info *vinfo,
>                                              stmt_info, bump);
>             }
>
> +         if (costing_p)
> +           {
> +             for (i = 0; i < vec_num; i++)
> +               vect_get_store_cost (vinfo, stmt_info, 1,
> +                                    alignment_support_scheme, misalignment,
> +                                    &inside_cost, cost_vec);
> +             continue;
> +           }
> +
>           /* Get an array into which we can store the individual vectors.  */
>           tree vec_array = create_vector_array (vectype, vec_num);
>
> @@ -9003,6 +9037,12 @@ vectorizable_store (vec_info *vinfo,
>           STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
>         }
>
> +      if (costing_p && dump_enabled_p ())
> +       dump_printf_loc (MSG_NOTE, vect_location,
> +                        "vect_model_store_cost: inside_cost = %d, "
> +                        "prologue_cost = %d .\n",
> +                        inside_cost, prologue_cost);
> +
>        return true;
>      }
>
> --
> 2.31.1
>
diff mbox series

Patch

diff --git a/gcc/tree-vect-stmts.cc b/gcc/tree-vect-stmts.cc
index 3d01168080a..fbd16b8a487 100644
--- a/gcc/tree-vect-stmts.cc
+++ b/gcc/tree-vect-stmts.cc
@@ -966,7 +966,8 @@  vect_model_store_cost (vec_info *vinfo, stmt_vec_info stmt_info, int ncopies,
 {
   gcc_assert (memory_access_type != VMAT_GATHER_SCATTER
 	      && memory_access_type != VMAT_ELEMENTWISE
-	      && memory_access_type != VMAT_STRIDED_SLP);
+	      && memory_access_type != VMAT_STRIDED_SLP
+	      && memory_access_type != VMAT_LOAD_STORE_LANES);
   unsigned int inside_cost = 0, prologue_cost = 0;
   stmt_vec_info first_stmt_info = stmt_info;
   bool grouped_access_p = STMT_VINFO_GROUPED_ACCESS (stmt_info);
@@ -8408,7 +8409,8 @@  vectorizable_store (vec_info *vinfo,
       if (grouped_store
 	  && !slp
 	  && first_stmt_info != stmt_info
-	  && memory_access_type == VMAT_ELEMENTWISE)
+	  && (memory_access_type == VMAT_ELEMENTWISE
+	      || memory_access_type == VMAT_LOAD_STORE_LANES))
 	return true;
     }
   gcc_assert (memory_access_type == STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info));
@@ -8479,6 +8481,31 @@  vectorizable_store (vec_info *vinfo,
     dump_printf_loc (MSG_NOTE, vect_location, "transform store. ncopies = %d\n",
 		     ncopies);
 
+  /* Check if we need to update prologue cost for invariant,
+     and update it accordingly if so.  If it's not for
+     interleaving store, we can just check vls_type; but if
+     it's for interleaving store, need to check the def_type
+     of the stored value since the current vls_type is just
+     for first_stmt_info.  */
+  auto update_prologue_cost = [&](unsigned *prologue_cost, tree store_rhs)
+  {
+    gcc_assert (costing_p);
+    if (slp)
+      return;
+    if (grouped_store)
+      {
+	gcc_assert (store_rhs);
+	enum vect_def_type cdt;
+	gcc_assert (vect_is_simple_use (store_rhs, vinfo, &cdt));
+	if (cdt != vect_constant_def && cdt != vect_external_def)
+	  return;
+      }
+    else if (vls_type != VLS_STORE_INVARIANT)
+      return;
+    *prologue_cost += record_stmt_cost (cost_vec, 1, scalar_to_vec, stmt_info,
+					0, vect_prologue);
+  };
+
   if (memory_access_type == VMAT_ELEMENTWISE
       || memory_access_type == VMAT_STRIDED_SLP)
     {
@@ -8646,14 +8673,8 @@  vectorizable_store (vec_info *vinfo,
 	  if (!costing_p)
 	    vect_get_vec_defs (vinfo, next_stmt_info, slp_node, ncopies, op,
 			       &vec_oprnds);
-	  else if (!slp)
-	    {
-	      enum vect_def_type cdt;
-	      gcc_assert (vect_is_simple_use (op, vinfo, &cdt));
-	      if (cdt == vect_constant_def || cdt == vect_external_def)
-		prologue_cost += record_stmt_cost (cost_vec, 1, scalar_to_vec,
-						   stmt_info, 0, vect_prologue);
-	    }
+	  else
+	    update_prologue_cost (&prologue_cost, op);
 	  unsigned int group_el = 0;
 	  unsigned HOST_WIDE_INT
 	    elsz = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (vectype)));
@@ -8857,13 +8878,7 @@  vectorizable_store (vec_info *vinfo,
   if (memory_access_type == VMAT_LOAD_STORE_LANES)
     {
       gcc_assert (!slp && grouped_store);
-      if (costing_p)
-	{
-	  vect_model_store_cost (vinfo, stmt_info, ncopies, memory_access_type,
-				 alignment_support_scheme, misalignment,
-				 vls_type, slp_node, cost_vec);
-	  return true;
-	}
+      unsigned inside_cost = 0, prologue_cost = 0;
       for (j = 0; j < ncopies; j++)
 	{
 	  gimple *new_stmt;
@@ -8879,29 +8894,39 @@  vectorizable_store (vec_info *vinfo,
 		     DR_GROUP_SIZE is the exact number of stmts in the
 		     chain. Therefore, NEXT_STMT_INFO can't be NULL_TREE.  */
 		  op = vect_get_store_rhs (next_stmt_info);
-		  vect_get_vec_defs_for_operand (vinfo, next_stmt_info, ncopies,
-						 op, gvec_oprnds[i]);
-		  vec_oprnd = (*gvec_oprnds[i])[0];
-		  dr_chain.quick_push (vec_oprnd);
+		  if (costing_p)
+		    update_prologue_cost (&prologue_cost, op);
+		  else
+		    {
+		      vect_get_vec_defs_for_operand (vinfo, next_stmt_info,
+						     ncopies, op,
+						     gvec_oprnds[i]);
+		      vec_oprnd = (*gvec_oprnds[i])[0];
+		      dr_chain.quick_push (vec_oprnd);
+		    }
 		  next_stmt_info = DR_GROUP_NEXT_ELEMENT (next_stmt_info);
 		}
-	      if (mask)
+
+	      if (!costing_p)
 		{
-		  vect_get_vec_defs_for_operand (vinfo, stmt_info, ncopies,
-						 mask, &vec_masks,
-						 mask_vectype);
-		  vec_mask = vec_masks[0];
-		}
+		  if (mask)
+		    {
+		      vect_get_vec_defs_for_operand (vinfo, stmt_info, ncopies,
+						     mask, &vec_masks,
+						     mask_vectype);
+		      vec_mask = vec_masks[0];
+		    }
 
-	      /* We should have catched mismatched types earlier.  */
-	      gcc_assert (
-		useless_type_conversion_p (vectype, TREE_TYPE (vec_oprnd)));
-	      dataref_ptr
-		= vect_create_data_ref_ptr (vinfo, first_stmt_info, aggr_type,
-					    NULL, offset, &dummy, gsi,
-					    &ptr_incr, false, bump);
+		  /* We should have catched mismatched types earlier.  */
+		  gcc_assert (
+		    useless_type_conversion_p (vectype, TREE_TYPE (vec_oprnd)));
+		  dataref_ptr
+		    = vect_create_data_ref_ptr (vinfo, first_stmt_info,
+						aggr_type, NULL, offset, &dummy,
+						gsi, &ptr_incr, false, bump);
+		}
 	    }
-	  else
+	  else if (!costing_p)
 	    {
 	      gcc_assert (!LOOP_VINFO_USING_SELECT_VL_P (loop_vinfo));
 	      /* DR_CHAIN is then used as an input to
@@ -8917,6 +8942,15 @@  vectorizable_store (vec_info *vinfo,
 					     stmt_info, bump);
 	    }
 
+	  if (costing_p)
+	    {
+	      for (i = 0; i < vec_num; i++)
+		vect_get_store_cost (vinfo, stmt_info, 1,
+				     alignment_support_scheme, misalignment,
+				     &inside_cost, cost_vec);
+	      continue;
+	    }
+
 	  /* Get an array into which we can store the individual vectors.  */
 	  tree vec_array = create_vector_array (vectype, vec_num);
 
@@ -9003,6 +9037,12 @@  vectorizable_store (vec_info *vinfo,
 	  STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
 	}
 
+      if (costing_p && dump_enabled_p ())
+	dump_printf_loc (MSG_NOTE, vect_location,
+			 "vect_model_store_cost: inside_cost = %d, "
+			 "prologue_cost = %d .\n",
+			 inside_cost, prologue_cost);
+
       return true;
     }