diff mbox series

[1/6] Fix vectorizable_conversion costs

Message ID mpt7e4etn78.fsf@arm.com
State New
Headers show
Series Optionally pick the cheapest loop_vec_info | expand

Commit Message

Richard Sandiford Nov. 5, 2019, 2:25 p.m. UTC
This patch makes two tweaks to vectorizable_conversion.  The first
is to use "modifier" to distinguish between promotion, demotion,
and neither promotion nor demotion, rather than using a code for
some cases and "modifier" for others.  The second is to take ncopies
into account for the promotion and demotion costs; previously we gave
multiple copies the same cost as a single copy.

Later patches test this, but it seemed worth splitting out.


2019-11-05  Richard Sandiford  <richard.sandiford@arm.com>

gcc/
	* tree-vect-stmts.c (vect_model_promotion_demotion_cost): Take the
	number of ncopies as an additional argument.
	(vectorizable_conversion): Update call accordingly.  Use "modifier"
	to check whether a conversion is between vectors with the same
	numbers of units.

Comments

Richard Biener Nov. 6, 2019, 12:01 p.m. UTC | #1
On Tue, Nov 5, 2019 at 3:25 PM Richard Sandiford
<Richard.Sandiford@arm.com> wrote:
>
> This patch makes two tweaks to vectorizable_conversion.  The first
> is to use "modifier" to distinguish between promotion, demotion,
> and neither promotion nor demotion, rather than using a code for
> some cases and "modifier" for others.  The second is to take ncopies
> into account for the promotion and demotion costs; previously we gave
> multiple copies the same cost as a single copy.
>
> Later patches test this, but it seemed worth splitting out.

OK, but does ncopies properly handle unrolling with SLP?

Richard.

>
> 2019-11-05  Richard Sandiford  <richard.sandiford@arm.com>
>
> gcc/
>         * tree-vect-stmts.c (vect_model_promotion_demotion_cost): Take the
>         number of ncopies as an additional argument.
>         (vectorizable_conversion): Update call accordingly.  Use "modifier"
>         to check whether a conversion is between vectors with the same
>         numbers of units.
>
> Index: gcc/tree-vect-stmts.c
> ===================================================================
> --- gcc/tree-vect-stmts.c       2019-11-05 11:08:12.521631453 +0000
> +++ gcc/tree-vect-stmts.c       2019-11-05 14:17:43.330141911 +0000
> @@ -917,26 +917,27 @@ vect_model_simple_cost (stmt_vec_info st
>  }
>
>
> -/* Model cost for type demotion and promotion operations.  PWR is normally
> -   zero for single-step promotions and demotions.  It will be one if
> -   two-step promotion/demotion is required, and so on.  Each additional
> +/* Model cost for type demotion and promotion operations.  PWR is
> +   normally zero for single-step promotions and demotions.  It will be
> +   one if two-step promotion/demotion is required, and so on.  NCOPIES
> +   is the number of vector results (and thus number of instructions)
> +   for the narrowest end of the operation chain.  Each additional
>     step doubles the number of instructions required.  */
>
>  static void
>  vect_model_promotion_demotion_cost (stmt_vec_info stmt_info,
> -                                   enum vect_def_type *dt, int pwr,
> +                                   enum vect_def_type *dt,
> +                                   unsigned int ncopies, int pwr,
>                                     stmt_vector_for_cost *cost_vec)
>  {
> -  int i, tmp;
> +  int i;
>    int inside_cost = 0, prologue_cost = 0;
>
>    for (i = 0; i < pwr + 1; i++)
>      {
> -      tmp = (STMT_VINFO_TYPE (stmt_info) == type_promotion_vec_info_type) ?
> -       (i + 1) : i;
> -      inside_cost += record_stmt_cost (cost_vec, vect_pow2 (tmp),
> -                                      vec_promote_demote, stmt_info, 0,
> -                                      vect_body);
> +      inside_cost += record_stmt_cost (cost_vec, ncopies, vec_promote_demote,
> +                                      stmt_info, 0, vect_body);
> +      ncopies *= 2;
>      }
>
>    /* FORNOW: Assuming maximum 2 args per stmts.  */
> @@ -4981,7 +4982,7 @@ vectorizable_conversion (stmt_vec_info s
>    if (!vec_stmt)               /* transformation not required.  */
>      {
>        DUMP_VECT_SCOPE ("vectorizable_conversion");
> -      if (code == FIX_TRUNC_EXPR || code == FLOAT_EXPR)
> +      if (modifier == NONE)
>          {
>           STMT_VINFO_TYPE (stmt_info) = type_conversion_vec_info_type;
>           vect_model_simple_cost (stmt_info, ncopies, dt, ndts, slp_node,
> @@ -4990,14 +4991,17 @@ vectorizable_conversion (stmt_vec_info s
>        else if (modifier == NARROW)
>         {
>           STMT_VINFO_TYPE (stmt_info) = type_demotion_vec_info_type;
> -         vect_model_promotion_demotion_cost (stmt_info, dt, multi_step_cvt,
> -                                             cost_vec);
> +         /* The final packing step produces one vector result per copy.  */
> +         vect_model_promotion_demotion_cost (stmt_info, dt, ncopies,
> +                                             multi_step_cvt, cost_vec);
>         }
>        else
>         {
>           STMT_VINFO_TYPE (stmt_info) = type_promotion_vec_info_type;
> -         vect_model_promotion_demotion_cost (stmt_info, dt, multi_step_cvt,
> -                                             cost_vec);
> +         /* The initial unpacking step produces two vector results
> +            per copy.  */
> +         vect_model_promotion_demotion_cost (stmt_info, dt, ncopies * 2,
> +                                             multi_step_cvt, cost_vec);
>         }
>        interm_types.release ();
>        return true;
Richard Sandiford Nov. 7, 2019, 3:14 p.m. UTC | #2
Richard Biener <richard.guenther@gmail.com> writes:
> On Tue, Nov 5, 2019 at 3:25 PM Richard Sandiford
> <Richard.Sandiford@arm.com> wrote:
>>
>> This patch makes two tweaks to vectorizable_conversion.  The first
>> is to use "modifier" to distinguish between promotion, demotion,
>> and neither promotion nor demotion, rather than using a code for
>> some cases and "modifier" for others.  The second is to take ncopies
>> into account for the promotion and demotion costs; previously we gave
>> multiple copies the same cost as a single copy.
>>
>> Later patches test this, but it seemed worth splitting out.
>
> OK, but does ncopies properly handle unrolling with SLP?

Bah, thanks for catching that.  Here's a fixed version, tested as before.

Richard


2019-11-07  Richard Sandiford  <richard.sandiford@arm.com>

gcc/
	* tree-vect-stmts.c (vect_model_promotion_demotion_cost): Take the
	number of ncopies as an additional argument.
	(vectorizable_conversion): Update call accordingly.  Use "modifier"
	to check whether a conversion is between vectors with the same
	numbers of units.

Index: gcc/tree-vect-stmts.c
===================================================================
--- gcc/tree-vect-stmts.c	2019-11-07 15:11:49.000000000 +0000
+++ gcc/tree-vect-stmts.c	2019-11-07 15:11:50.134775028 +0000
@@ -917,26 +917,27 @@ vect_model_simple_cost (stmt_vec_info st
 }
 
 
-/* Model cost for type demotion and promotion operations.  PWR is normally
-   zero for single-step promotions and demotions.  It will be one if 
-   two-step promotion/demotion is required, and so on.  Each additional
+/* Model cost for type demotion and promotion operations.  PWR is
+   normally zero for single-step promotions and demotions.  It will be
+   one if two-step promotion/demotion is required, and so on.  NCOPIES
+   is the number of vector results (and thus number of instructions)
+   for the narrowest end of the operation chain.  Each additional
    step doubles the number of instructions required.  */
 
 static void
 vect_model_promotion_demotion_cost (stmt_vec_info stmt_info,
-				    enum vect_def_type *dt, int pwr,
+				    enum vect_def_type *dt,
+				    unsigned int ncopies, int pwr,
 				    stmt_vector_for_cost *cost_vec)
 {
-  int i, tmp;
+  int i;
   int inside_cost = 0, prologue_cost = 0;
 
   for (i = 0; i < pwr + 1; i++)
     {
-      tmp = (STMT_VINFO_TYPE (stmt_info) == type_promotion_vec_info_type) ?
-	(i + 1) : i;
-      inside_cost += record_stmt_cost (cost_vec, vect_pow2 (tmp),
-				       vec_promote_demote, stmt_info, 0,
-				       vect_body);
+      inside_cost += record_stmt_cost (cost_vec, ncopies, vec_promote_demote,
+				       stmt_info, 0, vect_body);
+      ncopies *= 2;
     }
 
   /* FORNOW: Assuming maximum 2 args per stmts.  */
@@ -4964,7 +4965,7 @@ vectorizable_conversion (stmt_vec_info s
   if (!vec_stmt)		/* transformation not required.  */
     {
       DUMP_VECT_SCOPE ("vectorizable_conversion");
-      if (code == FIX_TRUNC_EXPR || code == FLOAT_EXPR)
+      if (modifier == NONE)
         {
 	  STMT_VINFO_TYPE (stmt_info) = type_conversion_vec_info_type;
 	  vect_model_simple_cost (stmt_info, ncopies, dt, ndts, slp_node,
@@ -4973,14 +4974,24 @@ vectorizable_conversion (stmt_vec_info s
       else if (modifier == NARROW)
 	{
 	  STMT_VINFO_TYPE (stmt_info) = type_demotion_vec_info_type;
-	  vect_model_promotion_demotion_cost (stmt_info, dt, multi_step_cvt,
-					      cost_vec);
+	  /* The final packing step produces one vector result per copy.  */
+	  unsigned int nvectors
+	    = (slp_node ? SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node) : ncopies);
+	  vect_model_promotion_demotion_cost (stmt_info, dt, nvectors,
+					      multi_step_cvt, cost_vec);
 	}
       else
 	{
 	  STMT_VINFO_TYPE (stmt_info) = type_promotion_vec_info_type;
-	  vect_model_promotion_demotion_cost (stmt_info, dt, multi_step_cvt,
-					      cost_vec);
+	  /* The initial unpacking step produces two vector results
+	     per copy.  MULTI_STEP_CVT is 0 for a single conversion,
+	     so >> MULTI_STEP_CVT divides by 2^(number of steps - 1).  */
+	  unsigned int nvectors
+	    = (slp_node
+	       ? SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node) >> multi_step_cvt
+	       : ncopies * 2);
+	  vect_model_promotion_demotion_cost (stmt_info, dt, nvectors,
+					      multi_step_cvt, cost_vec);
 	}
       interm_types.release ();
       return true;
Richard Biener Nov. 7, 2019, 4:13 p.m. UTC | #3
On November 7, 2019 4:14:14 PM GMT+01:00, Richard Sandiford <richard.sandiford@arm.com> wrote:
>Richard Biener <richard.guenther@gmail.com> writes:
>> On Tue, Nov 5, 2019 at 3:25 PM Richard Sandiford
>> <Richard.Sandiford@arm.com> wrote:
>>>
>>> This patch makes two tweaks to vectorizable_conversion.  The first
>>> is to use "modifier" to distinguish between promotion, demotion,
>>> and neither promotion nor demotion, rather than using a code for
>>> some cases and "modifier" for others.  The second is to take ncopies
>>> into account for the promotion and demotion costs; previously we
>gave
>>> multiple copies the same cost as a single copy.
>>>
>>> Later patches test this, but it seemed worth splitting out.
>>
>> OK, but does ncopies properly handle unrolling with SLP?
>
>Bah, thanks for catching that.  Here's a fixed version, tested as
>before.

OK. 

Thanks, 
Richard. 

>Richard
>
>
>2019-11-07  Richard Sandiford  <richard.sandiford@arm.com>
>
>gcc/
>	* tree-vect-stmts.c (vect_model_promotion_demotion_cost): Take the
>	number of ncopies as an additional argument.
>	(vectorizable_conversion): Update call accordingly.  Use "modifier"
>	to check whether a conversion is between vectors with the same
>	numbers of units.
>
>Index: gcc/tree-vect-stmts.c
>===================================================================
>--- gcc/tree-vect-stmts.c	2019-11-07 15:11:49.000000000 +0000
>+++ gcc/tree-vect-stmts.c	2019-11-07 15:11:50.134775028 +0000
>@@ -917,26 +917,27 @@ vect_model_simple_cost (stmt_vec_info st
> }
> 
> 
>-/* Model cost for type demotion and promotion operations.  PWR is
>normally
>-   zero for single-step promotions and demotions.  It will be one if 
>-   two-step promotion/demotion is required, and so on.  Each
>additional
>+/* Model cost for type demotion and promotion operations.  PWR is
>+   normally zero for single-step promotions and demotions.  It will be
>+   one if two-step promotion/demotion is required, and so on.  NCOPIES
>+   is the number of vector results (and thus number of instructions)
>+   for the narrowest end of the operation chain.  Each additional
>    step doubles the number of instructions required.  */
> 
> static void
> vect_model_promotion_demotion_cost (stmt_vec_info stmt_info,
>-				    enum vect_def_type *dt, int pwr,
>+				    enum vect_def_type *dt,
>+				    unsigned int ncopies, int pwr,
> 				    stmt_vector_for_cost *cost_vec)
> {
>-  int i, tmp;
>+  int i;
>   int inside_cost = 0, prologue_cost = 0;
> 
>   for (i = 0; i < pwr + 1; i++)
>     {
>-      tmp = (STMT_VINFO_TYPE (stmt_info) ==
>type_promotion_vec_info_type) ?
>-	(i + 1) : i;
>-      inside_cost += record_stmt_cost (cost_vec, vect_pow2 (tmp),
>-				       vec_promote_demote, stmt_info, 0,
>-				       vect_body);
>+      inside_cost += record_stmt_cost (cost_vec, ncopies,
>vec_promote_demote,
>+				       stmt_info, 0, vect_body);
>+      ncopies *= 2;
>     }
> 
>   /* FORNOW: Assuming maximum 2 args per stmts.  */
>@@ -4964,7 +4965,7 @@ vectorizable_conversion (stmt_vec_info s
>   if (!vec_stmt)		/* transformation not required.  */
>     {
>       DUMP_VECT_SCOPE ("vectorizable_conversion");
>-      if (code == FIX_TRUNC_EXPR || code == FLOAT_EXPR)
>+      if (modifier == NONE)
>         {
> 	  STMT_VINFO_TYPE (stmt_info) = type_conversion_vec_info_type;
> 	  vect_model_simple_cost (stmt_info, ncopies, dt, ndts, slp_node,
>@@ -4973,14 +4974,24 @@ vectorizable_conversion (stmt_vec_info s
>       else if (modifier == NARROW)
> 	{
> 	  STMT_VINFO_TYPE (stmt_info) = type_demotion_vec_info_type;
>-	  vect_model_promotion_demotion_cost (stmt_info, dt, multi_step_cvt,
>-					      cost_vec);
>+	  /* The final packing step produces one vector result per copy.  */
>+	  unsigned int nvectors
>+	    = (slp_node ? SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node) : ncopies);
>+	  vect_model_promotion_demotion_cost (stmt_info, dt, nvectors,
>+					      multi_step_cvt, cost_vec);
> 	}
>       else
> 	{
> 	  STMT_VINFO_TYPE (stmt_info) = type_promotion_vec_info_type;
>-	  vect_model_promotion_demotion_cost (stmt_info, dt, multi_step_cvt,
>-					      cost_vec);
>+	  /* The initial unpacking step produces two vector results
>+	     per copy.  MULTI_STEP_CVT is 0 for a single conversion,
>+	     so >> MULTI_STEP_CVT divides by 2^(number of steps - 1).  */
>+	  unsigned int nvectors
>+	    = (slp_node
>+	       ? SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node) >> multi_step_cvt
>+	       : ncopies * 2);
>+	  vect_model_promotion_demotion_cost (stmt_info, dt, nvectors,
>+					      multi_step_cvt, cost_vec);
> 	}
>       interm_types.release ();
>       return true;
diff mbox series

Patch

Index: gcc/tree-vect-stmts.c
===================================================================
--- gcc/tree-vect-stmts.c	2019-11-05 11:08:12.521631453 +0000
+++ gcc/tree-vect-stmts.c	2019-11-05 14:17:43.330141911 +0000
@@ -917,26 +917,27 @@  vect_model_simple_cost (stmt_vec_info st
 }
 
 
-/* Model cost for type demotion and promotion operations.  PWR is normally
-   zero for single-step promotions and demotions.  It will be one if 
-   two-step promotion/demotion is required, and so on.  Each additional
+/* Model cost for type demotion and promotion operations.  PWR is
+   normally zero for single-step promotions and demotions.  It will be
+   one if two-step promotion/demotion is required, and so on.  NCOPIES
+   is the number of vector results (and thus number of instructions)
+   for the narrowest end of the operation chain.  Each additional
    step doubles the number of instructions required.  */
 
 static void
 vect_model_promotion_demotion_cost (stmt_vec_info stmt_info,
-				    enum vect_def_type *dt, int pwr,
+				    enum vect_def_type *dt,
+				    unsigned int ncopies, int pwr,
 				    stmt_vector_for_cost *cost_vec)
 {
-  int i, tmp;
+  int i;
   int inside_cost = 0, prologue_cost = 0;
 
   for (i = 0; i < pwr + 1; i++)
     {
-      tmp = (STMT_VINFO_TYPE (stmt_info) == type_promotion_vec_info_type) ?
-	(i + 1) : i;
-      inside_cost += record_stmt_cost (cost_vec, vect_pow2 (tmp),
-				       vec_promote_demote, stmt_info, 0,
-				       vect_body);
+      inside_cost += record_stmt_cost (cost_vec, ncopies, vec_promote_demote,
+				       stmt_info, 0, vect_body);
+      ncopies *= 2;
     }
 
   /* FORNOW: Assuming maximum 2 args per stmts.  */
@@ -4981,7 +4982,7 @@  vectorizable_conversion (stmt_vec_info s
   if (!vec_stmt)		/* transformation not required.  */
     {
       DUMP_VECT_SCOPE ("vectorizable_conversion");
-      if (code == FIX_TRUNC_EXPR || code == FLOAT_EXPR)
+      if (modifier == NONE)
         {
 	  STMT_VINFO_TYPE (stmt_info) = type_conversion_vec_info_type;
 	  vect_model_simple_cost (stmt_info, ncopies, dt, ndts, slp_node,
@@ -4990,14 +4991,17 @@  vectorizable_conversion (stmt_vec_info s
       else if (modifier == NARROW)
 	{
 	  STMT_VINFO_TYPE (stmt_info) = type_demotion_vec_info_type;
-	  vect_model_promotion_demotion_cost (stmt_info, dt, multi_step_cvt,
-					      cost_vec);
+	  /* The final packing step produces one vector result per copy.  */
+	  vect_model_promotion_demotion_cost (stmt_info, dt, ncopies,
+					      multi_step_cvt, cost_vec);
 	}
       else
 	{
 	  STMT_VINFO_TYPE (stmt_info) = type_promotion_vec_info_type;
-	  vect_model_promotion_demotion_cost (stmt_info, dt, multi_step_cvt,
-					      cost_vec);
+	  /* The initial unpacking step produces two vector results
+	     per copy.  */
+	  vect_model_promotion_demotion_cost (stmt_info, dt, ncopies * 2,
+					      multi_step_cvt, cost_vec);
 	}
       interm_types.release ();
       return true;