diff mbox series

[02/10] vect: Create array_slice of live-out stmts

Message ID mpteec9rncw.fsf@arm.com
State New
Headers show
Series [01/10] vect: Simplify epilogue reduction code | expand

Commit Message

Richard Sandiford July 8, 2021, 12:39 p.m. UTC
This patch constructs an array_slice of the scalar statements that
produce live-out reduction results in the original unvectorised loop.
There are three cases:

- SLP reduction chains: the final SLP stmt is live-out
- full SLP reductions: all SLP stmts are live-out
- non-SLP reductions: the single scalar stmt is live-out

This is a slight simplification on its own, mostly because it maans
“group_size” has a consistent meaning throughout the function.
The main justification though is that it helps with later patches.

gcc/
	* tree-vect-loop.c (vect_create_epilog_for_reduction): Truncate
	scalar_results to group_size elements after reducing down from
	N*group_size elements.  Construct an array_slice of the live-out
	stmts and assert that there is one stmt per scalar result.
---
 gcc/tree-vect-loop.c | 61 +++++++++++++++-----------------------------
 1 file changed, 21 insertions(+), 40 deletions(-)

Comments

Richard Biener July 8, 2021, 12:58 p.m. UTC | #1
On Thu, Jul 8, 2021 at 2:42 PM Richard Sandiford via Gcc-patches
<gcc-patches@gcc.gnu.org> wrote:
>
> This patch constructs an array_slice of the scalar statements that
> produce live-out reduction results in the original unvectorised loop.
> There are three cases:
>
> - SLP reduction chains: the final SLP stmt is live-out
> - full SLP reductions: all SLP stmts are live-out
> - non-SLP reductions: the single scalar stmt is live-out
>
> This is a slight simplification on its own, mostly because it maans
> “group_size” has a consistent meaning throughout the function.
> The main justification though is that it helps with later patches.

OK

> gcc/
>         * tree-vect-loop.c (vect_create_epilog_for_reduction): Truncate
>         scalar_results to group_size elements after reducing down from
>         N*group_size elements.  Construct an array_slice of the live-out
>         stmts and assert that there is one stmt per scalar result.
> ---
>  gcc/tree-vect-loop.c | 61 +++++++++++++++-----------------------------
>  1 file changed, 21 insertions(+), 40 deletions(-)
>
> diff --git a/gcc/tree-vect-loop.c b/gcc/tree-vect-loop.c
> index 7c3e3352b43..8390ac80ca0 100644
> --- a/gcc/tree-vect-loop.c
> +++ b/gcc/tree-vect-loop.c
> @@ -5010,7 +5010,12 @@ vect_create_epilog_for_reduction (loop_vec_info loop_vinfo,
>    auto_vec<tree> scalar_results;
>    unsigned int group_size = 1, k;
>    auto_vec<gimple *> phis;
> -  bool slp_reduc = false;
> +  /* SLP reduction without reduction chain, e.g.,
> +     # a1 = phi <a2, a0>
> +     # b1 = phi <b2, b0>
> +     a2 = operation (a1)
> +     b2 = operation (b1)  */
> +  bool slp_reduc = (slp_node && !REDUC_GROUP_FIRST_ELEMENT (stmt_info));
>    bool direct_slp_reduc;
>    tree new_phi_result;
>    tree induction_index = NULL_TREE;
> @@ -5050,6 +5055,16 @@ vect_create_epilog_for_reduction (loop_vec_info loop_vinfo,
>         adjustment_def = STMT_VINFO_REDUC_EPILOGUE_ADJUSTMENT (reduc_info);
>      }
>
> +  stmt_vec_info single_live_out_stmt[] = { stmt_info };
> +  array_slice<const stmt_vec_info> live_out_stmts = single_live_out_stmt;
> +  if (slp_reduc)
> +    /* All statements produce live-out values.  */
> +    live_out_stmts = SLP_TREE_SCALAR_STMTS (slp_node);
> +  else if (slp_node)
> +    /* The last statement in the reduction chain produces the live-out
> +       value.  */
> +    single_live_out_stmt[0] = SLP_TREE_SCALAR_STMTS (slp_node)[group_size - 1];
> +
>    unsigned vec_num;
>    int ncopies;
>    if (slp_node)
> @@ -5248,13 +5263,6 @@ vect_create_epilog_for_reduction (loop_vec_info loop_vinfo,
>    new_scalar_dest = vect_create_destination_var (scalar_dest, NULL);
>    bitsize = TYPE_SIZE (scalar_type);
>
> -  /* SLP reduction without reduction chain, e.g.,
> -     # a1 = phi <a2, a0>
> -     # b1 = phi <b2, b0>
> -     a2 = operation (a1)
> -     b2 = operation (b1)  */
> -  slp_reduc = (slp_node && !REDUC_GROUP_FIRST_ELEMENT (stmt_info));
> -
>    /* True if we should implement SLP_REDUC using native reduction operations
>       instead of scalar operations.  */
>    direct_slp_reduc = (reduc_fn != IFN_LAST
> @@ -5877,6 +5885,7 @@ vect_create_epilog_for_reduction (loop_vec_info loop_vinfo,
>                                           first_res, res);
>                    scalar_results[j % group_size] = new_res;
>                  }
> +             scalar_results.truncate (group_size);
>               for (k = 0; k < group_size; k++)
>                 scalar_results[k] = gimple_convert (&stmts, scalar_type,
>                                                     scalar_results[k]);
> @@ -5969,39 +5978,11 @@ vect_create_epilog_for_reduction (loop_vec_info loop_vinfo,
>            use <s_out4>
>            use <s_out4> */
>
> -
> -  /* In SLP reduction chain we reduce vector results into one vector if
> -     necessary, hence we set here REDUC_GROUP_SIZE to 1.  SCALAR_DEST is the
> -     LHS of the last stmt in the reduction chain, since we are looking for
> -     the loop exit phi node.  */
> -  if (REDUC_GROUP_FIRST_ELEMENT (stmt_info))
> -    {
> -      stmt_vec_info dest_stmt_info
> -       = vect_orig_stmt (SLP_TREE_SCALAR_STMTS (slp_node)[group_size - 1]);
> -      scalar_dest = gimple_assign_lhs (dest_stmt_info->stmt);
> -      group_size = 1;
> -    }
> -
> -  /* In SLP we may have several statements in NEW_PHIS and REDUCTION_PHIS (in
> -     case that REDUC_GROUP_SIZE is greater than vectorization factor).
> -     Therefore, we need to match SCALAR_RESULTS with corresponding statements.
> -     The first (REDUC_GROUP_SIZE / number of new vector stmts) scalar results
> -     correspond to the first vector stmt, etc.
> -     (RATIO is equal to (REDUC_GROUP_SIZE / number of new vector stmts)).  */
> -  if (group_size > new_phis.length ())
> -    gcc_assert (!(group_size % new_phis.length ()));
> -
> -  for (k = 0; k < group_size; k++)
> +  gcc_assert (live_out_stmts.size () == scalar_results.length ());
> +  for (k = 0; k < live_out_stmts.size (); k++)
>      {
> -      if (slp_reduc)
> -        {
> -         stmt_vec_info scalar_stmt_info = SLP_TREE_SCALAR_STMTS (slp_node)[k];
> -
> -         orig_stmt_info = STMT_VINFO_RELATED_STMT (scalar_stmt_info);
> -         /* SLP statements can't participate in patterns.  */
> -         gcc_assert (!orig_stmt_info);
> -         scalar_dest = gimple_assign_lhs (scalar_stmt_info->stmt);
> -        }
> +      stmt_vec_info scalar_stmt_info = vect_orig_stmt (live_out_stmts[k]);
> +      scalar_dest = gimple_assign_lhs (scalar_stmt_info->stmt);
>
>        phis.create (3);
>        /* Find the loop-closed-use at the loop exit of the original scalar
diff mbox series

Patch

diff --git a/gcc/tree-vect-loop.c b/gcc/tree-vect-loop.c
index 7c3e3352b43..8390ac80ca0 100644
--- a/gcc/tree-vect-loop.c
+++ b/gcc/tree-vect-loop.c
@@ -5010,7 +5010,12 @@  vect_create_epilog_for_reduction (loop_vec_info loop_vinfo,
   auto_vec<tree> scalar_results;
   unsigned int group_size = 1, k;
   auto_vec<gimple *> phis;
-  bool slp_reduc = false;
+  /* SLP reduction without reduction chain, e.g.,
+     # a1 = phi <a2, a0>
+     # b1 = phi <b2, b0>
+     a2 = operation (a1)
+     b2 = operation (b1)  */
+  bool slp_reduc = (slp_node && !REDUC_GROUP_FIRST_ELEMENT (stmt_info));
   bool direct_slp_reduc;
   tree new_phi_result;
   tree induction_index = NULL_TREE;
@@ -5050,6 +5055,16 @@  vect_create_epilog_for_reduction (loop_vec_info loop_vinfo,
 	adjustment_def = STMT_VINFO_REDUC_EPILOGUE_ADJUSTMENT (reduc_info);
     }
 
+  stmt_vec_info single_live_out_stmt[] = { stmt_info };
+  array_slice<const stmt_vec_info> live_out_stmts = single_live_out_stmt;
+  if (slp_reduc)
+    /* All statements produce live-out values.  */
+    live_out_stmts = SLP_TREE_SCALAR_STMTS (slp_node);
+  else if (slp_node)
+    /* The last statement in the reduction chain produces the live-out
+       value.  */
+    single_live_out_stmt[0] = SLP_TREE_SCALAR_STMTS (slp_node)[group_size - 1];
+
   unsigned vec_num;
   int ncopies;
   if (slp_node)
@@ -5248,13 +5263,6 @@  vect_create_epilog_for_reduction (loop_vec_info loop_vinfo,
   new_scalar_dest = vect_create_destination_var (scalar_dest, NULL);
   bitsize = TYPE_SIZE (scalar_type);
 
-  /* SLP reduction without reduction chain, e.g.,
-     # a1 = phi <a2, a0>
-     # b1 = phi <b2, b0>
-     a2 = operation (a1)
-     b2 = operation (b1)  */
-  slp_reduc = (slp_node && !REDUC_GROUP_FIRST_ELEMENT (stmt_info));
-
   /* True if we should implement SLP_REDUC using native reduction operations
      instead of scalar operations.  */
   direct_slp_reduc = (reduc_fn != IFN_LAST
@@ -5877,6 +5885,7 @@  vect_create_epilog_for_reduction (loop_vec_info loop_vinfo,
 					  first_res, res);
                   scalar_results[j % group_size] = new_res;
                 }
+	      scalar_results.truncate (group_size);
 	      for (k = 0; k < group_size; k++)
 		scalar_results[k] = gimple_convert (&stmts, scalar_type,
 						    scalar_results[k]);
@@ -5969,39 +5978,11 @@  vect_create_epilog_for_reduction (loop_vec_info loop_vinfo,
           use <s_out4>  
           use <s_out4> */
 
-
-  /* In SLP reduction chain we reduce vector results into one vector if
-     necessary, hence we set here REDUC_GROUP_SIZE to 1.  SCALAR_DEST is the
-     LHS of the last stmt in the reduction chain, since we are looking for
-     the loop exit phi node.  */
-  if (REDUC_GROUP_FIRST_ELEMENT (stmt_info))
-    {
-      stmt_vec_info dest_stmt_info
-	= vect_orig_stmt (SLP_TREE_SCALAR_STMTS (slp_node)[group_size - 1]);
-      scalar_dest = gimple_assign_lhs (dest_stmt_info->stmt);
-      group_size = 1;
-    }
-
-  /* In SLP we may have several statements in NEW_PHIS and REDUCTION_PHIS (in
-     case that REDUC_GROUP_SIZE is greater than vectorization factor).
-     Therefore, we need to match SCALAR_RESULTS with corresponding statements.
-     The first (REDUC_GROUP_SIZE / number of new vector stmts) scalar results
-     correspond to the first vector stmt, etc.
-     (RATIO is equal to (REDUC_GROUP_SIZE / number of new vector stmts)).  */
-  if (group_size > new_phis.length ())
-    gcc_assert (!(group_size % new_phis.length ()));
-
-  for (k = 0; k < group_size; k++)
+  gcc_assert (live_out_stmts.size () == scalar_results.length ());
+  for (k = 0; k < live_out_stmts.size (); k++)
     {
-      if (slp_reduc)
-        {
-	  stmt_vec_info scalar_stmt_info = SLP_TREE_SCALAR_STMTS (slp_node)[k];
-
-	  orig_stmt_info = STMT_VINFO_RELATED_STMT (scalar_stmt_info);
-	  /* SLP statements can't participate in patterns.  */
-	  gcc_assert (!orig_stmt_info);
-	  scalar_dest = gimple_assign_lhs (scalar_stmt_info->stmt);
-        }
+      stmt_vec_info scalar_stmt_info = vect_orig_stmt (live_out_stmts[k]);
+      scalar_dest = gimple_assign_lhs (scalar_stmt_info->stmt);
 
       phis.create (3);
       /* Find the loop-closed-use at the loop exit of the original scalar