diff mbox

[PR66851] Handle double reduction in parloops

Message ID 55A3D170.6080304@mentor.com
State New
Headers show

Commit Message

Tom de Vries July 13, 2015, 2:55 p.m. UTC
Hi,

this patch fixes PR66851.

In parloops, we manage to parallelize outer loops, but not if the inner 
loop contains a reduction. There is an xfail in autopar/outer-4.c for this:
...
/* { dg-final { scan-tree-dump-times "parallelizing outer loop" 1 
"parloops" { xfail *-*-* } } } */
...

This patch allows outer loops with a reduction in the inner loop to be 
parallelized.

Bootstrapped and reg-tested on x86_64.

OK for trunk?

Thanks,
- Tom

Comments

Tom de Vries July 24, 2015, 10:30 a.m. UTC | #1
On 13/07/15 16:55, Tom de Vries wrote:
> Hi,
>
> this patch fixes PR66851.
>
> In parloops, we manage to parallelize outer loops, but not if the inner
> loop contains a reduction. There is an xfail in autopar/outer-4.c for this:
> ...
> /* { dg-final { scan-tree-dump-times "parallelizing outer loop" 1
> "parloops" { xfail *-*-* } } } */
> ...
>
> This patch allows outer loops with a reduction in the inner loop to be
> parallelized.
>
> Bootstrapped and reg-tested on x86_64.
>
> OK for trunk?
>

Ping ( original posting at 
https://gcc.gnu.org/ml/gcc-patches/2015-07/msg01057.html ).

Thanks,
- Tom

> 0001-Handle-double-reduction-in-parloops.patch
>
>
> Handle double reduction in parloops
>
> 2015-07-13  Tom de Vries<tom@codesourcery.com>
>
> 	PR tree-optimization/66851
> 	* tree-parloops.c (reduc_stmt_res): New function.
> 	(initialize_reductions, add_field_for_reduction)
> 	(create_phi_for_local_result, create_loads_for_reductions)
> 	(create_stores_for_reduction, build_new_reduction): Handle case that
> 	reduc_stmt is a phi.
> 	(gather_scalar_reductions): Allow double_reduc reductions.
>
> 	* gcc.dg/autopar/outer-4.c (parloop): Remove superfluous noinline
> 	attribute.  Remove xfail on scan for parallelizing outer loop.
> 	(main): Remove.
>
> 	* testsuite/libgomp.c/outer-4.c: New test.
> ---
>   gcc/testsuite/gcc.dg/autopar/outer-4.c | 17 ++++------------
>   gcc/tree-parloops.c                    | 37 +++++++++++++++++++++++++---------
>   libgomp/testsuite/libgomp.c/outer-4.c  | 36 +++++++++++++++++++++++++++++++++
>   3 files changed, 68 insertions(+), 22 deletions(-)
>   create mode 100644 libgomp/testsuite/libgomp.c/outer-4.c
>
> diff --git a/gcc/testsuite/gcc.dg/autopar/outer-4.c b/gcc/testsuite/gcc.dg/autopar/outer-4.c
> index 6fd37c5..f435080 100644
> --- a/gcc/testsuite/gcc.dg/autopar/outer-4.c
> +++ b/gcc/testsuite/gcc.dg/autopar/outer-4.c
> @@ -6,15 +6,13 @@ void abort (void);
>   int g_sum=0;
>   int x[500][500];
>
> -__attribute__((noinline))
> -void parloop (int N)
> +void
> +parloop (int N)
>   {
>     int i, j;
>     int sum;
>
> -  /* Double reduction is currently not supported, outer loop is not
> -     parallelized.  Inner reduction is detected, inner loop is
> -     parallelized.  */
> +  /* Double reduction is detected, outer loop is parallelized.  */
>     sum = 0;
>     for (i = 0; i < N; i++)
>       for (j = 0; j < N; j++)
> @@ -23,13 +21,6 @@ void parloop (int N)
>     g_sum = sum;
>   }
>
> -int main(void)
> -{
> -  parloop(500);
> -
> -  return 0;
> -}
> -
>
> -/* { dg-final { scan-tree-dump-times "parallelizing outer loop" 1 "parloops" { xfail *-*-* } } } */
> +/* { dg-final { scan-tree-dump-times "parallelizing outer loop" 1 "parloops" } } */
>   /* { dg-final { scan-tree-dump-times "loopfn" 4 "optimized" } } */
> diff --git a/gcc/tree-parloops.c b/gcc/tree-parloops.c
> index 21ed17b..db7da62 100644
> --- a/gcc/tree-parloops.c
> +++ b/gcc/tree-parloops.c
> @@ -560,6 +560,14 @@ take_address_of (tree obj, tree type, edge entry,
>     return name;
>   }
>
> +static tree
> +reduc_stmt_res (gimple stmt)
> +{
> +  return (gimple_code (stmt) == GIMPLE_PHI
> +	  ? gimple_phi_result (stmt)
> +	  : gimple_assign_lhs (stmt));
> +}
> +
>   /* Callback for htab_traverse.  Create the initialization statement
>      for reduction described in SLOT, and place it at the preheader of
>      the loop described in DATA.  */
> @@ -586,7 +594,7 @@ initialize_reductions (reduction_info **slot, struct loop *loop)
>     c = build_omp_clause (gimple_location (reduc->reduc_stmt),
>   			OMP_CLAUSE_REDUCTION);
>     OMP_CLAUSE_REDUCTION_CODE (c) = reduc->reduction_code;
> -  OMP_CLAUSE_DECL (c) = SSA_NAME_VAR (gimple_assign_lhs (reduc->reduc_stmt));
> +  OMP_CLAUSE_DECL (c) = SSA_NAME_VAR (reduc_stmt_res (reduc->reduc_stmt));
>
>     init = omp_reduction_init (c, TREE_TYPE (bvar));
>     reduc->init = init;
> @@ -993,7 +1001,7 @@ add_field_for_reduction (reduction_info **slot, tree type)
>   {
>
>     struct reduction_info *const red = *slot;
> -  tree var = gimple_assign_lhs (red->reduc_stmt);
> +  tree var = reduc_stmt_res (red->reduc_stmt);
>     tree field = build_decl (gimple_location (red->reduc_stmt), FIELD_DECL,
>   			   SSA_NAME_IDENTIFIER (var), TREE_TYPE (var));
>
> @@ -1053,12 +1061,12 @@ create_phi_for_local_result (reduction_info **slot, struct loop *loop)
>       e = EDGE_PRED (store_bb, 1);
>     else
>       e = EDGE_PRED (store_bb, 0);
> -  local_res = copy_ssa_name (gimple_assign_lhs (reduc->reduc_stmt));
> +  tree lhs = reduc_stmt_res (reduc->reduc_stmt);
> +  local_res = copy_ssa_name (lhs);
>     locus = gimple_location (reduc->reduc_stmt);
>     new_phi = create_phi_node (local_res, store_bb);
>     add_phi_arg (new_phi, reduc->init, e, locus);
> -  add_phi_arg (new_phi, gimple_assign_lhs (reduc->reduc_stmt),
> -	       FALLTHRU_EDGE (loop->latch), locus);
> +  add_phi_arg (new_phi, lhs, FALLTHRU_EDGE (loop->latch), locus);
>     reduc->new_phi = new_phi;
>
>     return 1;
> @@ -1151,7 +1159,7 @@ create_loads_for_reductions (reduction_info **slot, struct clsn_data *clsn_data)
>     struct reduction_info *const red = *slot;
>     gimple stmt;
>     gimple_stmt_iterator gsi;
> -  tree type = TREE_TYPE (gimple_assign_lhs (red->reduc_stmt));
> +  tree type = TREE_TYPE (reduc_stmt_res (red->reduc_stmt));
>     tree load_struct;
>     tree name;
>     tree x;
> @@ -1212,7 +1220,7 @@ create_stores_for_reduction (reduction_info **slot, struct clsn_data *clsn_data)
>     tree t;
>     gimple stmt;
>     gimple_stmt_iterator gsi;
> -  tree type = TREE_TYPE (gimple_assign_lhs (red->reduc_stmt));
> +  tree type = TREE_TYPE (reduc_stmt_res (red->reduc_stmt));
>
>     gsi = gsi_last_bb (clsn_data->store_bb);
>     t = build3 (COMPONENT_REF, type, clsn_data->store, red->field, NULL_TREE);
> @@ -2321,6 +2329,7 @@ build_new_reduction (reduction_info_table_type *reduction_list,
>   {
>     reduction_info **slot;
>     struct reduction_info *new_reduction;
> +  enum tree_code reduction_code;
>
>     gcc_assert (reduc_stmt);
>
> @@ -2332,12 +2341,22 @@ build_new_reduction (reduction_info_table_type *reduction_list,
>         fprintf (dump_file, "\n");
>       }
>
> +  if (gimple_code (reduc_stmt) == GIMPLE_PHI)
> +    {
> +      tree op1 = PHI_ARG_DEF (reduc_stmt, 0);
> +      gimple def1 = SSA_NAME_DEF_STMT (op1);
> +      reduction_code = gimple_assign_rhs_code (def1);
> +    }
> +
> +  else
> +    reduction_code = gimple_assign_rhs_code (reduc_stmt);
> +
>     new_reduction = XCNEW (struct reduction_info);
>
>     new_reduction->reduc_stmt = reduc_stmt;
>     new_reduction->reduc_phi = phi;
>     new_reduction->reduc_version = SSA_NAME_VERSION (gimple_phi_result (phi));
> -  new_reduction->reduction_code = gimple_assign_rhs_code (reduc_stmt);
> +  new_reduction->reduction_code = reduction_code;
>     slot = reduction_list->find_slot (new_reduction, INSERT);
>     *slot = new_reduction;
>   }
> @@ -2378,7 +2397,7 @@ gather_scalar_reductions (loop_p loop, reduction_info_table_type *reduction_list
>              gimple reduc_stmt = vect_force_simple_reduction (simple_loop_info,
>   							    phi, true,
>   							    &double_reduc);
> -	   if (reduc_stmt && !double_reduc)
> +	   if (reduc_stmt)
>                 build_new_reduction (reduction_list, reduc_stmt, phi);
>           }
>       }
> diff --git a/libgomp/testsuite/libgomp.c/outer-4.c b/libgomp/testsuite/libgomp.c/outer-4.c
> new file mode 100644
> index 0000000..f77f634
> --- /dev/null
> +++ b/libgomp/testsuite/libgomp.c/outer-4.c
> @@ -0,0 +1,36 @@
> +/* { dg-do run } */
> +/* { dg-additional-options "-ftree-parallelize-loops=2" } */
> +
> +void abort (void);
> +
> +int g_sum = 1;
> +
> +int x[500][500];
> +
> +void __attribute__((noinline,noclone))
> +parloop (int N)
> +{
> +  int i, j;
> +  int sum;
> +
> +  /* Double reduction is detected, outer loop is parallelized.  */
> +  sum = 0;
> +  for (i = 0; i < N; i++)
> +    for (j = 0; j < N; j++)
> +      sum += x[i][j];
> +
> +  g_sum = sum;
> +}
> +
> +int
> +main (void)
> +{
> +  x[234][432] = 2;
> +
> +  parloop (500);
> +
> +  if (g_sum != 2)
> +    abort ();
> +
> +  return 0;
> +}
> -- 1.9.1
>
diff mbox

Patch

Handle double reduction in parloops

2015-07-13  Tom de Vries  <tom@codesourcery.com>

	PR tree-optimization/66851
	* tree-parloops.c (reduc_stmt_res): New function.
	(initialize_reductions, add_field_for_reduction)
	(create_phi_for_local_result, create_loads_for_reductions)
	(create_stores_for_reduction, build_new_reduction): Handle case that
	reduc_stmt is a phi.
	(gather_scalar_reductions): Allow double_reduc reductions.

	* gcc.dg/autopar/outer-4.c (parloop): Remove superfluous noinline
	attribute.  Remove xfail on scan for parallelizing outer loop.
	(main): Remove.

	* testsuite/libgomp.c/outer-4.c: New test.
---
 gcc/testsuite/gcc.dg/autopar/outer-4.c | 17 ++++------------
 gcc/tree-parloops.c                    | 37 +++++++++++++++++++++++++---------
 libgomp/testsuite/libgomp.c/outer-4.c  | 36 +++++++++++++++++++++++++++++++++
 3 files changed, 68 insertions(+), 22 deletions(-)
 create mode 100644 libgomp/testsuite/libgomp.c/outer-4.c

diff --git a/gcc/testsuite/gcc.dg/autopar/outer-4.c b/gcc/testsuite/gcc.dg/autopar/outer-4.c
index 6fd37c5..f435080 100644
--- a/gcc/testsuite/gcc.dg/autopar/outer-4.c
+++ b/gcc/testsuite/gcc.dg/autopar/outer-4.c
@@ -6,15 +6,13 @@  void abort (void);
 int g_sum=0;
 int x[500][500];
 
-__attribute__((noinline))
-void parloop (int N)
+void
+parloop (int N)
 {
   int i, j;
   int sum;
 
-  /* Double reduction is currently not supported, outer loop is not 
-     parallelized.  Inner reduction is detected, inner loop is 
-     parallelized.  */
+  /* Double reduction is detected, outer loop is parallelized.  */
   sum = 0;
   for (i = 0; i < N; i++)
     for (j = 0; j < N; j++)
@@ -23,13 +21,6 @@  void parloop (int N)
   g_sum = sum;
 }
 
-int main(void)
-{
-  parloop(500);
-
-  return 0;
-}
-
 
-/* { dg-final { scan-tree-dump-times "parallelizing outer loop" 1 "parloops" { xfail *-*-* } } } */
+/* { dg-final { scan-tree-dump-times "parallelizing outer loop" 1 "parloops" } } */
 /* { dg-final { scan-tree-dump-times "loopfn" 4 "optimized" } } */
diff --git a/gcc/tree-parloops.c b/gcc/tree-parloops.c
index 21ed17b..db7da62 100644
--- a/gcc/tree-parloops.c
+++ b/gcc/tree-parloops.c
@@ -560,6 +560,14 @@  take_address_of (tree obj, tree type, edge entry,
   return name;
 }
 
+static tree
+reduc_stmt_res (gimple stmt)
+{
+  return (gimple_code (stmt) == GIMPLE_PHI
+	  ? gimple_phi_result (stmt)
+	  : gimple_assign_lhs (stmt));
+}
+
 /* Callback for htab_traverse.  Create the initialization statement
    for reduction described in SLOT, and place it at the preheader of
    the loop described in DATA.  */
@@ -586,7 +594,7 @@  initialize_reductions (reduction_info **slot, struct loop *loop)
   c = build_omp_clause (gimple_location (reduc->reduc_stmt),
 			OMP_CLAUSE_REDUCTION);
   OMP_CLAUSE_REDUCTION_CODE (c) = reduc->reduction_code;
-  OMP_CLAUSE_DECL (c) = SSA_NAME_VAR (gimple_assign_lhs (reduc->reduc_stmt));
+  OMP_CLAUSE_DECL (c) = SSA_NAME_VAR (reduc_stmt_res (reduc->reduc_stmt));
 
   init = omp_reduction_init (c, TREE_TYPE (bvar));
   reduc->init = init;
@@ -993,7 +1001,7 @@  add_field_for_reduction (reduction_info **slot, tree type)
 {
 
   struct reduction_info *const red = *slot;
-  tree var = gimple_assign_lhs (red->reduc_stmt);
+  tree var = reduc_stmt_res (red->reduc_stmt);
   tree field = build_decl (gimple_location (red->reduc_stmt), FIELD_DECL,
 			   SSA_NAME_IDENTIFIER (var), TREE_TYPE (var));
 
@@ -1053,12 +1061,12 @@  create_phi_for_local_result (reduction_info **slot, struct loop *loop)
     e = EDGE_PRED (store_bb, 1);
   else
     e = EDGE_PRED (store_bb, 0);
-  local_res = copy_ssa_name (gimple_assign_lhs (reduc->reduc_stmt));
+  tree lhs = reduc_stmt_res (reduc->reduc_stmt);
+  local_res = copy_ssa_name (lhs);
   locus = gimple_location (reduc->reduc_stmt);
   new_phi = create_phi_node (local_res, store_bb);
   add_phi_arg (new_phi, reduc->init, e, locus);
-  add_phi_arg (new_phi, gimple_assign_lhs (reduc->reduc_stmt),
-	       FALLTHRU_EDGE (loop->latch), locus);
+  add_phi_arg (new_phi, lhs, FALLTHRU_EDGE (loop->latch), locus);
   reduc->new_phi = new_phi;
 
   return 1;
@@ -1151,7 +1159,7 @@  create_loads_for_reductions (reduction_info **slot, struct clsn_data *clsn_data)
   struct reduction_info *const red = *slot;
   gimple stmt;
   gimple_stmt_iterator gsi;
-  tree type = TREE_TYPE (gimple_assign_lhs (red->reduc_stmt));
+  tree type = TREE_TYPE (reduc_stmt_res (red->reduc_stmt));
   tree load_struct;
   tree name;
   tree x;
@@ -1212,7 +1220,7 @@  create_stores_for_reduction (reduction_info **slot, struct clsn_data *clsn_data)
   tree t;
   gimple stmt;
   gimple_stmt_iterator gsi;
-  tree type = TREE_TYPE (gimple_assign_lhs (red->reduc_stmt));
+  tree type = TREE_TYPE (reduc_stmt_res (red->reduc_stmt));
 
   gsi = gsi_last_bb (clsn_data->store_bb);
   t = build3 (COMPONENT_REF, type, clsn_data->store, red->field, NULL_TREE);
@@ -2321,6 +2329,7 @@  build_new_reduction (reduction_info_table_type *reduction_list,
 {
   reduction_info **slot;
   struct reduction_info *new_reduction;
+  enum tree_code reduction_code;
 
   gcc_assert (reduc_stmt);
 
@@ -2332,12 +2341,22 @@  build_new_reduction (reduction_info_table_type *reduction_list,
       fprintf (dump_file, "\n");
     }
 
+  if (gimple_code (reduc_stmt) == GIMPLE_PHI)
+    {
+      tree op1 = PHI_ARG_DEF (reduc_stmt, 0);
+      gimple def1 = SSA_NAME_DEF_STMT (op1);
+      reduction_code = gimple_assign_rhs_code (def1);
+    }
+
+  else
+    reduction_code = gimple_assign_rhs_code (reduc_stmt);
+
   new_reduction = XCNEW (struct reduction_info);
 
   new_reduction->reduc_stmt = reduc_stmt;
   new_reduction->reduc_phi = phi;
   new_reduction->reduc_version = SSA_NAME_VERSION (gimple_phi_result (phi));
-  new_reduction->reduction_code = gimple_assign_rhs_code (reduc_stmt);
+  new_reduction->reduction_code = reduction_code;
   slot = reduction_list->find_slot (new_reduction, INSERT);
   *slot = new_reduction;
 }
@@ -2378,7 +2397,7 @@  gather_scalar_reductions (loop_p loop, reduction_info_table_type *reduction_list
            gimple reduc_stmt = vect_force_simple_reduction (simple_loop_info,
 							    phi, true,
 							    &double_reduc);
-	   if (reduc_stmt && !double_reduc)
+	   if (reduc_stmt)
               build_new_reduction (reduction_list, reduc_stmt, phi);
         }
     }
diff --git a/libgomp/testsuite/libgomp.c/outer-4.c b/libgomp/testsuite/libgomp.c/outer-4.c
new file mode 100644
index 0000000..f77f634
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c/outer-4.c
@@ -0,0 +1,36 @@ 
+/* { dg-do run } */
+/* { dg-additional-options "-ftree-parallelize-loops=2" } */
+
+void abort (void);
+
+int g_sum = 1;
+
+int x[500][500];
+
+void __attribute__((noinline,noclone))
+parloop (int N)
+{
+  int i, j;
+  int sum;
+
+  /* Double reduction is detected, outer loop is parallelized.  */
+  sum = 0;
+  for (i = 0; i < N; i++)
+    for (j = 0; j < N; j++)
+      sum += x[i][j];
+
+  g_sum = sum;
+}
+
+int
+main (void)
+{
+  x[234][432] = 2;
+
+  parloop (500);
+
+  if (g_sum != 2)
+    abort ();
+
+  return 0;
+}
-- 
1.9.1