diff mbox

[OpenACC,6/11] Reduction initialization

Message ID 5627E65D.30106@acm.org
State New
Headers show

Commit Message

Nathan Sidwell Oct. 21, 2015, 7:24 p.m. UTC
This patch is a temporary measure to avoid breaking reductions, until I post the 
reductions patch set (which builds on this).

Currently OpenACC reductions are handled by
(a) spawning all threads throughout the offload region
(b) having them each individually write to an allocated slot in a 'reductions 
array', according to their thread number.
(c) having the host collate the reduction values after the region.

This is clearly a rather restricted implementation of reductions.  With loop 
partitioning implemented, not all threads execute though -- in fact, on a loop 
lacking any gang, worker or vector specifier, the loop won't be partitioned 
(until I commit the 'auto' implementation).  This  leads to entries in the 
reduction array being uninitialized.

This patch takes the brute-force approach of  initializing the reductions array 
on the host before offloading and then copying it to the device.  Thus at the 
end of the region, any slots that weren't used have a sensible initial value 
which will not destroy the reduction result.

This code should be short lived ...

nathan

Comments

Jakub Jelinek Oct. 22, 2015, 8:58 a.m. UTC | #1
On Wed, Oct 21, 2015 at 03:24:13PM -0400, Nathan Sidwell wrote:
> 2015-10-20  Nathan Sidwell  <nathan@codesourcery.com>
> 
> 	* omp-low.c (oacc_init_rediction_array): New.
> 	(oacc_initialize_reduction_data): Initialize array.

Ok.

	Jakub
Nathan Sidwell Oct. 27, 2015, 10:15 p.m. UTC | #2
On 10/22/15 01:58, Jakub Jelinek wrote:
> On Wed, Oct 21, 2015 at 03:24:13PM -0400, Nathan Sidwell wrote:
>> 2015-10-20  Nathan Sidwell  <nathan@codesourcery.com>
>>
>> 	* omp-low.c (oacc_init_rediction_array): New.
>> 	(oacc_initialize_reduction_data): Initialize array.
>
> Ok.

Committed.
diff mbox

Patch

2015-10-20  Nathan Sidwell  <nathan@codesourcery.com>

	* omp-low.c (oacc_init_rediction_array): New.
	(oacc_initialize_reduction_data): Initialize array.

Index: gcc/omp-low.c
===================================================================
--- gcc/omp-low.c	(revision 229101)
+++ gcc/omp-low.c	(working copy)
@@ -12202,6 +13008,71 @@  oacc_gimple_assign (tree dest, tree_code
   gimplify_assign (dest, result, seq);
 }
 
+/* Initialize the reduction array with default values.  */
+
+static void
+oacc_init_reduction_array (tree array, tree init, tree nthreads,
+			   gimple_seq *stmt_seqp)
+{
+  tree type = TREE_TYPE (TREE_TYPE (array));
+  tree x, loop_header, loop_body, loop_exit;
+  gimple *stmt;
+
+  /* Create for loop.
+
+     let var = the original reduction variable
+     let array = reduction variable array
+
+     for (i = 0; i < nthreads; i++)
+       var op= array[i]
+ */
+
+  loop_header = create_artificial_label (UNKNOWN_LOCATION);
+  loop_body = create_artificial_label (UNKNOWN_LOCATION);
+  loop_exit = create_artificial_label (UNKNOWN_LOCATION);
+
+  /* Create and initialize an index variable.  */
+  tree ix = create_tmp_var (sizetype);
+  gimplify_assign (ix, fold_build1 (NOP_EXPR, sizetype, integer_zero_node),
+		   stmt_seqp);
+
+  /* Insert the loop header label here.  */
+  gimple_seq_add_stmt (stmt_seqp, gimple_build_label (loop_header));
+
+  /* Exit loop if ix >= nthreads.  */
+  x = create_tmp_var (sizetype);
+  gimplify_assign (x, fold_build1 (NOP_EXPR, sizetype, nthreads), stmt_seqp);
+  stmt = gimple_build_cond (GE_EXPR, ix, x, loop_exit, loop_body);
+  gimple_seq_add_stmt (stmt_seqp, stmt);
+
+  /* Insert the loop body label here.  */
+  gimple_seq_add_stmt (stmt_seqp, gimple_build_label (loop_body));
+
+  /* Calculate the array offset.  */
+  tree offset = create_tmp_var (sizetype);
+  gimplify_assign (offset, TYPE_SIZE_UNIT (type), stmt_seqp);
+  stmt = gimple_build_assign (offset, MULT_EXPR, offset, ix);
+  gimple_seq_add_stmt (stmt_seqp, stmt);
+
+  tree ptr = create_tmp_var (TREE_TYPE (array));
+  stmt = gimple_build_assign (ptr, POINTER_PLUS_EXPR, array, offset);
+  gimple_seq_add_stmt (stmt_seqp, stmt);
+
+  /* Assign init.  */
+  gimplify_assign (build_simple_mem_ref (ptr), init, stmt_seqp);
+
+  /* Increment the induction variable.  */
+  tree one = fold_build1 (NOP_EXPR, sizetype, integer_one_node);
+  stmt = gimple_build_assign (ix, PLUS_EXPR, ix, one);
+  gimple_seq_add_stmt (stmt_seqp, stmt);
+
+  /* Go back to the top of the loop.  */
+  gimple_seq_add_stmt (stmt_seqp, gimple_build_goto (loop_header));
+
+  /* Place the loop exit label here.  */
+  gimple_seq_add_stmt (stmt_seqp, gimple_build_label (loop_exit));
+}
+
 /* Helper function to initialize local data for the reduction arrays.
    The reduction arrays need to be placed inside the calling function
    for accelerators, or else the host won't be able to preform the final
@@ -12261,12 +13132,18 @@  oacc_initialize_reduction_data (tree cla
       gimple_call_set_lhs (stmt, array);
       gimple_seq_add_stmt (stmt_seqp, stmt);
 
+      /* Initialize array. */
+      tree init = omp_reduction_init_op (OMP_CLAUSE_LOCATION (c),
+					 OMP_CLAUSE_REDUCTION_CODE (c),
+					 type);
+      oacc_init_reduction_array (array, init, nthreads, stmt_seqp);
+
       /* Map this array into the accelerator.  */
 
       /* Add the reduction array to the list of clauses.  */
       tree x = array;
       t = build_omp_clause (gimple_location (ctx->stmt), OMP_CLAUSE_MAP);
-      OMP_CLAUSE_SET_MAP_KIND (t, GOMP_MAP_FORCE_FROM);
+      OMP_CLAUSE_SET_MAP_KIND (t, GOMP_MAP_FORCE_TOFROM);
       OMP_CLAUSE_DECL (t) = x;
       OMP_CLAUSE_CHAIN (t) = NULL;
       if (oc)