diff mbox

[gomp4,committed] Neuter gang-single code in gang-redundant mode

Message ID 561C1336.2050401@mentor.com
State New
Headers show

Commit Message

Tom de Vries Oct. 12, 2015, 8:08 p.m. UTC
Hi,

ATM stores in the non-loop parts of an oacc kernels region are executed 
by all gangs.

This patch makes sure that those stores are only executed by gang 0. I'm 
not aware atm of any related failing test cases, but it at least reduces 
the amount of stores performed.

Bootstrapped and reg-tested on x86_64.

Committed to gomp-4_0-branch.

Thanks,
- Tom
diff mbox

Patch

Neuter gang-single code in gang-redundant mode

2015-10-12  Tom de Vries  <tom@codesourcery.com>

	* omp-low.c (is_oacc_kernels): New function.
	(lower_omp_target): Insert gang-pos at start of kernels region.
	(execute_oacc_device_lower): Handle IFN_GOACC_DIM_POS without result.
	* tree-parloops.c (create_parallel_loop): Don't expect
	single_pred_p (bb) if oacc_kernels_p.
	(oacc_entry_exit_ok_1): Add and handle reduction_stores parameter.
	(oacc_entry_exit_single_gang): New function.
	(oacc_entry_exit_ok): Call oacc_entry_exit_single_gang.
---
 gcc/omp-low.c       |  27 ++++++++--
 gcc/tree-parloops.c | 139 ++++++++++++++++++++++++++++++++++++++++++++++++++--
 2 files changed, 159 insertions(+), 7 deletions(-)

diff --git a/gcc/omp-low.c b/gcc/omp-low.c
index e700dd1..df08c2c 100644
--- a/gcc/omp-low.c
+++ b/gcc/omp-low.c
@@ -294,6 +294,17 @@  is_oacc_parallel (omp_context *ctx)
 	      == GF_OMP_TARGET_KIND_OACC_PARALLEL));
 }
 
+/* Return true if CTX corresponds to an oacc kernels region.  */
+
+static bool
+is_oacc_kernels (omp_context *ctx)
+{
+  enum gimple_code outer_type = gimple_code (ctx->stmt);
+  return ((outer_type == GIMPLE_OMP_TARGET)
+	  && (gimple_omp_target_kind (ctx->stmt)
+	      == GF_OMP_TARGET_KIND_OACC_KERNELS));
+}
+
 /* Return true if VAR is a is private reduction variable.  A reduction
    variable is considered private if the variable is local to the
    offloaded region, or if it is the first reduction to use a mapped
@@ -12962,6 +12973,13 @@  lower_omp_target (gimple_stmt_iterator *gsi_p, omp_context *ctx)
 
   if (offloaded)
     {
+      if (is_oacc_kernels (ctx))
+	{
+	  tree arg = build_int_cst (integer_type_node, GOMP_DIM_GANG);
+	  gcall *gang_single
+	    = gimple_build_call_internal (IFN_GOACC_DIM_POS, 1, arg);
+	  gimple_seq_add_stmt (&new_body, gang_single);
+	}
       gimple_seq_add_stmt (&new_body, gimple_build_omp_entry_end ());
       if (ctx->reductions)
 	{
@@ -15696,7 +15714,9 @@  execute_oacc_device_lower ()
 
 	  case IFN_GOACC_DIM_POS:
 	  case IFN_GOACC_DIM_SIZE:
-	    if (oacc_xform_dim (call, dims, ifn_code == IFN_GOACC_DIM_POS))
+	    if (gimple_call_lhs (call) == NULL_TREE)
+	      rescan = -1;
+	    else if (oacc_xform_dim (call, dims, ifn_code == IFN_GOACC_DIM_POS))
 	      rescan = 1;
 	    break;
 
@@ -15740,8 +15760,9 @@  execute_oacc_device_lower ()
 	  gsi_next (&gsi);
 	else if (rescan < 0)
 	  {
-	    replace_uses_by (gimple_vdef (call),
-			     gimple_vuse (call));
+	    if (gimple_vdef (call))
+	      replace_uses_by (gimple_vdef (call),
+			       gimple_vuse (call));
 	    gsi_remove (&gsi, true);
 	  }
       }
diff --git a/gcc/tree-parloops.c b/gcc/tree-parloops.c
index d4eb32a..0ac416d 100644
--- a/gcc/tree-parloops.c
+++ b/gcc/tree-parloops.c
@@ -2031,9 +2031,11 @@  create_parallel_loop (struct loop *loop, tree loop_fn, tree data,
 
   /* Prepare the GIMPLE_OMP_PARALLEL statement.  */
   bb = loop_preheader_edge (loop)->src;
-  paral_bb = single_pred (bb);
   if (!oacc_kernels_p)
-    gsi = gsi_last_bb (paral_bb);
+    {
+      paral_bb = single_pred (bb);
+      gsi = gsi_last_bb (paral_bb);
+    }
   else
     /* Make sure the oacc parallel is inserted on top of the oacc kernels
        region.  */
@@ -2859,7 +2861,8 @@  ref_conflicts_with_region (gimple_stmt_iterator gsi, ao_ref *ref,
 static bool
 oacc_entry_exit_ok_1 (bitmap in_loop_bbs, vec<basic_block> region_bbs,
 		      tree omp_data_i,
-		      reduction_info_table_type *reduction_list)
+		      reduction_info_table_type *reduction_list,
+		      bitmap reduction_stores)
 {
   unsigned i;
   basic_block bb;
@@ -2919,6 +2922,9 @@  oacc_entry_exit_ok_1 (bitmap in_loop_bbs, vec<basic_block> region_bbs,
 			  single_imm_use (val, &use_p, &use_stmt);
 			  if (gimple_store_p (use_stmt))
 			    {
+			      unsigned int id
+				= SSA_NAME_VERSION (gimple_vdef (use_stmt));
+			      bitmap_set_bit (reduction_stores, id);
 			      skip_stmt = use_stmt;
 			      if (dump_file)
 				{
@@ -2948,6 +2954,9 @@  oacc_entry_exit_ok_1 (bitmap in_loop_bbs, vec<basic_block> region_bbs,
 		   && !gimple_vdef (stmt)
 		   && !gimple_vuse (stmt))
 	    continue;
+	  else if (gimple_call_internal_p (stmt)
+		   && gimple_call_internal_fn (stmt) == IFN_GOACC_DIM_POS)
+	    continue;
 	  else
 	    {
 	      if (dump_file)
@@ -2974,6 +2983,106 @@  oacc_entry_exit_ok_1 (bitmap in_loop_bbs, vec<basic_block> region_bbs,
   return true;
 }
 
+/* Find stores inside REGION_BBS and outside IN_LOOP_BBS, and guard them with
+   GANG_POS == 0, except when the stores are REDUCTION_STORES.  Return true
+   if any changes were made.  */
+
+static bool
+oacc_entry_exit_single_gang (bitmap in_loop_bbs, vec<basic_block> region_bbs,
+			     bitmap reduction_stores, tree gang_pos)
+{
+  bool changed = false;
+
+  unsigned i;
+  basic_block bb;
+  FOR_EACH_VEC_ELT (region_bbs, i, bb)
+    {
+      if (bitmap_bit_p (in_loop_bbs, bb->index))
+	continue;
+
+      gimple_stmt_iterator gsi;
+      for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi);)
+	{
+	  gimple *stmt = gsi_stmt (gsi);
+
+	  if (!gimple_store_p (stmt))
+	    {
+	      /* Update gsi to point to next stmt.  */
+	      gsi_next (&gsi);
+	      continue;
+	    }
+
+	  if (bitmap_bit_p (reduction_stores,
+			    SSA_NAME_VERSION (gimple_vdef (stmt))))
+	    {
+	      if (dump_file)
+		{
+		  fprintf (dump_file,
+			   "skipped reduction store for single-gang"
+			   " neutering: ");
+		  print_gimple_stmt (dump_file, stmt, 0, 0);
+		}
+
+	      /* Update gsi to point to next stmt.  */
+	      gsi_next (&gsi);
+	      continue;
+	    }
+
+	  changed = true;
+
+	  if (dump_file)
+	    {
+	      fprintf (dump_file,
+		       "found store that needs single-gang neutering: ");
+	      print_gimple_stmt (dump_file, stmt, 0, 0);
+	    }
+
+	  {
+	    /* Split block before store.  */
+	    gimple_stmt_iterator gsi2 = gsi;
+	    gsi_prev (&gsi2);
+	    edge e;
+	    if (gsi_end_p (gsi2))
+	      {
+		e = split_block_after_labels (bb);
+		gsi2 = gsi_last_bb (bb);
+	      }
+	    else
+	      e = split_block (bb, gsi_stmt (gsi2));
+	    basic_block bb2 = e->dest;
+
+	    /* Split block after store.  */
+	    gimple_stmt_iterator gsi3 = gsi_start_bb (bb2);
+	    edge e2 = split_block (bb2, gsi_stmt (gsi3));
+	    basic_block bb3 = e2->dest;
+
+	    gimple *cond
+	      = gimple_build_cond (EQ_EXPR, gang_pos, integer_zero_node,
+				   NULL_TREE, NULL_TREE);
+	    gsi_insert_after (&gsi2, cond, GSI_NEW_STMT);
+
+	    edge e3 = make_edge (bb, bb3, EDGE_FALSE_VALUE);
+	    e->flags = EDGE_TRUE_VALUE;
+
+	    tree vdef = gimple_vdef (stmt);
+	    tree vuse = gimple_vuse (stmt);
+
+	    tree phi_res = copy_ssa_name (vdef);
+	    gphi *new_phi = create_phi_node (phi_res, bb3);
+	    replace_uses_by (vdef, phi_res);
+	    add_phi_arg (new_phi, vuse, e3, UNKNOWN_LOCATION);
+	    add_phi_arg (new_phi, vdef, e2, UNKNOWN_LOCATION);
+
+	    /* Update gsi to point to next stmt.  */
+	    bb = bb3;
+	    gsi = gsi_start_bb (bb);
+	  }
+	}
+    }
+
+  return changed;
+}
+
 static bool
 oacc_entry_exit_ok (struct loop *loop, basic_block region_entry,
 		    reduction_info_table_type *reduction_list)
@@ -2986,17 +3095,39 @@  oacc_entry_exit_ok (struct loop *loop, basic_block region_entry,
   tree omp_data_i = get_omp_data_i (region_entry);
   gcc_assert (omp_data_i != NULL_TREE);
 
+  gimple_stmt_iterator gsi = gsi_for_stmt (SSA_NAME_DEF_STMT (omp_data_i));
+  gsi_next_nondebug (&gsi);
+  gimple *stmt = gsi_stmt (gsi);
+  gcc_assert (gimple_call_internal_p (stmt)
+	      && gimple_call_internal_fn (stmt) == IFN_GOACC_DIM_POS);
+  tree gang_pos = make_ssa_name (integer_type_node);
+  gimple_call_set_lhs (stmt, gang_pos);
+
   bitmap in_loop_bbs = BITMAP_ALLOC (NULL);
   bitmap_clear (in_loop_bbs);
   for (unsigned int i = 0; i < loop->num_nodes; i++)
     bitmap_set_bit (in_loop_bbs, loop_bbs[i]->index);
 
+  bitmap reduction_stores = BITMAP_ALLOC (NULL);
   bool res = oacc_entry_exit_ok_1 (in_loop_bbs, region_bbs, omp_data_i,
-				   reduction_list);
+				   reduction_list, reduction_stores);
+
+  if (res)
+    {
+      bool changed
+	= oacc_entry_exit_single_gang (in_loop_bbs, region_bbs,
+				       reduction_stores, gang_pos);
+      if (changed)
+	{
+	  free_dominance_info (CDI_DOMINATORS);
+	  calculate_dominance_info (CDI_DOMINATORS);
+	}
+    }
 
   free (loop_bbs);
 
   BITMAP_FREE (in_loop_bbs);
+  BITMAP_FREE (reduction_stores);
 
   return res;
 }
-- 
1.9.1