diff mbox

[gomp4] Worker reduction builtin

Message ID 55C3F04C.1060604@codesourcery.com
State New
Headers show

Commit Message

Cesar Philippidis Aug. 6, 2015, 11:39 p.m. UTC
On 08/04/2015 04:50 AM, Nathan Sidwell wrote:

> +/* Worker reduction address expander.  */
> +static rtx
> +nvptx_expand_work_red_addr (tree exp, rtx target,
> +			    machine_mode ARG_UNUSED (mode),
> +			    int ignore)
>  {
> -  return nvptx_expand_lock_unlock (desc, exp, false);
> +  if (ignore)
> +    return target;
> +  
> +  rtx loop_id = expand_expr (CALL_EXPR_ARG (exp, 0),
> +			     NULL_RTX, mode, EXPAND_NORMAL);
> +  rtx red_id = expand_expr (CALL_EXPR_ARG (exp, 1),
> +			     NULL_RTX, mode, EXPAND_NORMAL);
> +  gcc_assert (GET_CODE (loop_id) == CONST_INT
> +	      && GET_CODE (red_id) == CONST_INT);
> +  gcc_assert (REG_P (target));
> +
> +  unsigned lid = (unsigned)UINTVAL (loop_id);
> +  unsigned rid = (unsigned)UINTVAL (red_id);
> +
> +  unsigned ix;
> +
> +  for (ix = 0; ix != loop_reds.length (); ix++)
> +    if (loop_reds[ix].id == lid)
> +      goto found_lid;
> +  /* Allocate a new loop.  */
> +  loop_reds.safe_push (loop_red (lid));
> + found_lid:
> +  loop_red &loop = loop_reds[ix];
> +  for (ix = 0; ix != loop.vars.length (); ix++)
> +    if (loop.vars[ix].first == rid)
> +      goto found_rid;
> +
> +  /* Allocate a new var. */
> +  {
> +    tree type = TREE_TYPE (TREE_TYPE (exp));
> +    enum machine_mode mode = TYPE_MODE (type);
> +    unsigned align = GET_MODE_ALIGNMENT (mode) / BITS_PER_UNIT;
> +    unsigned off = loop.hwm;
> +
> +    if (align > worker_red_align)
> +      worker_red_align = align;
> +    off = (off + align - 1) & ~(align -1);
> +    loop.hwm = off + GET_MODE_SIZE (mode);
> +    loop.vars.safe_push (var_red_t (rid, off));
> +  }
> + found_rid:
> +
> +  /* Return offset into worker reduction array.  */
> +  unsigned offset = loop.vars[ix].second;
> +  
> +  rtx addr = gen_reg_rtx (Pmode);
> +  emit_move_insn (addr,
> +		  gen_rtx_PLUS (Pmode, worker_red_sym, GEN_INT (offset)));
> +  emit_insn (gen_rtx_SET (target,
> +			  gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr),
> +					  UNSPEC_FROM_SHARED)));
> +  return target;
>  }

Something is wrong over here. I'm seeing this ICE:

wred.c: In function ‘main._omp_fn.0’:
wred.c:9:9: error: unrecognizable insn:
 #pragma acc parallel loop vector_length (32) num_workers (32) worker
reduction (+:red) copy (red)
         ^
(insn 28 27 29 2 (set (reg:DI 59)
        (plus:DI (symbol_ref:DI ("__worker_red"))
            (const_int 0 [0]))) wred.c:9 -1
     (nil))

The attached patch fixes it by assigning worker_red_sym to a scratch
register. Is this OK gomp-4_0-branch?

Cesar

Comments

Nathan Sidwell Aug. 7, 2015, 6:56 a.m. UTC | #1
On 08/07/15 01:39, Cesar Philippidis wrote:

> The attached patch fixes it by assigning worker_red_sym to a scratch
> register. Is this OK gomp-4_0-branch?

It'd be simpler to not create the PLUS rtx if offset == 0
diff mbox

Patch

2015-08-06  Cesar Philippidis  <cesar@codesourcery.com>

        gcc/
	* config/nvptx/nvptx.c (nvptx_expand_work_red_addr): Use a
	scratch register for worker_red_sym.
	

diff --git a/gcc/config/nvptx/nvptx.c b/gcc/config/nvptx/nvptx.c
index e343e53..389e370 100644
--- a/gcc/config/nvptx/nvptx.c
+++ b/gcc/config/nvptx/nvptx.c
@@ -3415,10 +3415,12 @@  nvptx_expand_work_red_addr (tree exp, rtx target,
 
   /* Return offset into worker reduction array.  */
   unsigned offset = loop.vars[ix].second;
-  
+
+  rtx base = gen_reg_rtx (Pmode);
   rtx addr = gen_reg_rtx (Pmode);
+  emit_insn (gen_rtx_SET (base, worker_red_sym));
   emit_move_insn (addr,
-		  gen_rtx_PLUS (Pmode, worker_red_sym, GEN_INT (offset)));
+		  gen_rtx_PLUS (Pmode, base, GEN_INT (offset)));
   emit_insn (gen_rtx_SET (target,
 			  gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr),
 					  UNSPEC_FROM_SHARED)));