diff mbox

OpenACC middle end changes

Message ID 87tx0ts0bb.fsf@schwinge.name
State New
Headers show

Commit Message

Thomas Schwinge Dec. 18, 2014, 10:46 a.m. UTC
Hi!

On Thu, 13 Nov 2014 19:09:49 +0100, Jakub Jelinek <jakub@redhat.com> wrote:
> On Thu, Nov 13, 2014 at 05:59:11PM +0100, Thomas Schwinge wrote:
> > --- gcc/builtins.c
> > +++ gcc/builtins.c

> > +/* Expand OpenACC acc_on_device.
> > +
> > +   This has to happen late (that is, not in early folding; expand_builtin_*,
> > +   rather than fold_builtin_*), as we have to act differently for host and
> > +   acceleration device (ACCEL_COMPILER conditional).  */
> > +
> > +static rtx
> > +expand_builtin_acc_on_device (tree exp, rtx target ATTRIBUTE_UNUSED)
> > +{
> > +  if (!validate_arglist (exp, INTEGER_TYPE, VOID_TYPE))
> > +    return NULL_RTX;
> > +
> > +  tree arg, v1, v2, ret;
> > +  location_t loc;
> > +
> > +  arg = CALL_EXPR_ARG (exp, 0);
> > +  arg = builtin_save_expr (arg);
> > +  loc = EXPR_LOCATION (exp);
> > +
> > +  /* Build: (arg == v1 || arg == v2) ? 1 : 0.  */
> > +
> > +#ifdef ACCEL_COMPILER
> > +  v1 = build_int_cst (TREE_TYPE (arg), /* TODO: acc_device_not_host */ 3);
> > +  v2 = build_int_cst (TREE_TYPE (arg), ACCEL_COMPILER_acc_device);
> > +#else
> > +  v1 = build_int_cst (TREE_TYPE (arg), /* TODO: acc_device_none */ 0);
> > +  v2 = build_int_cst (TREE_TYPE (arg), /* TODO: acc_device_host */ 2);
> > +#endif
> > +
> > +  v1 = fold_build2_loc (loc, EQ_EXPR, integer_type_node, arg, v1);
> > +  v2 = fold_build2_loc (loc, EQ_EXPR, integer_type_node, arg, v2);
> > +
> > +  /* Can't use TRUTH_ORIF_EXPR, as that is not supported by
> > +     expand_expr_real*.  */
> > +  ret = fold_build3_loc (loc, COND_EXPR, integer_type_node, v1, v1, v2);
> > +  ret = fold_build3_loc (loc, COND_EXPR, integer_type_node,
> > +			 ret, integer_one_node, integer_zero_node);
> > +
> > +  return expand_normal (ret);
> 
> If you can't fold it late (which is indeed a problem for -O0),
> then I'd suggest to implement this more RTL-ish.
> So, avoid the builtin_save_expr, instead
>   rtx op = expand_normal (arg);
> Don't build v1/v2 as trees (and, please fix the TODOs), but rtxes,

(acc_device_* TODOs already resolved earlier on.)

> just
>   rtx v1 = GEN_INT (...);
>   rtx v2 = GEN_INT (...);
>   machine_mode mode = TYPE_MODE (TREE_TYPE (arg));
>   rtx ret = gen_reg_rtx (TYPE_MODE (integer_type_node));
>   emit_move_insn (ret, const0_rtx);
>   rtx_code_label *done_label = gen_label_rtx ();
>   emit_cmp_and_jump_insns (op, v1, NE, NULL_RTX, mode,
> 			   false, done_label, PROB_EVEN);
>   emit_cmp_and_jump_insns (op, v2, NE, NULL_RTX, mode,
> 			   false, done_label, PROB_EVEN);
>   emit_move_insn (ret, const1_rtx);
>   emit_label (done_label);
>   return ret;
> or similar.

Thanks for the review/suggestion/code!

> Note, it would still be worthwhile to fold the builtin, at least
> when optimizing, after IPA.  Dunno if we have some property you can check,
> and Richard B. could suggest where it would be most appropriate (if GIMPLE
> guarded match.pd entry, or what), gimple_fold, etc.

I'll make a note to have a look at that later on.

> I bet I should handle omp_is_initial_device (); similarly.

Yeah.

Committed to gomp-4_0-branch in r218858:

commit da5ad5aec1c0f9b230ecb2dc00620a5598de5066
Author: tschwinge <tschwinge@138bc75d-0d04-0410-961f-82ee72b054a4>
Date:   Thu Dec 18 10:42:30 2014 +0000

    OpenACC acc_on_device: Make builtin expansion more RTXy.
    
    	gcc/
    	* builtins.c (expand_builtin_acc_on_device): Make more RTXy.
    
    git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/branches/gomp-4_0-branch@218858 138bc75d-0d04-0410-961f-82ee72b054a4
---
 gcc/ChangeLog.gomp |  5 +++++
 gcc/builtins.c     | 44 +++++++++++++++++++++-----------------------
 2 files changed, 26 insertions(+), 23 deletions(-)



Grüße,
 Thomas

Comments

Jakub Jelinek Dec. 18, 2014, 11:33 a.m. UTC | #1
On Thu, Dec 18, 2014 at 11:46:00AM +0100, Thomas Schwinge wrote:
> > just
> >   rtx v1 = GEN_INT (...);
> >   rtx v2 = GEN_INT (...);
> >   machine_mode mode = TYPE_MODE (TREE_TYPE (arg));
> >   rtx ret = gen_reg_rtx (TYPE_MODE (integer_type_node));
> >   emit_move_insn (ret, const0_rtx);
> >   rtx_code_label *done_label = gen_label_rtx ();
> >   emit_cmp_and_jump_insns (op, v1, NE, NULL_RTX, mode,
> > 			   false, done_label, PROB_EVEN);
> >   emit_cmp_and_jump_insns (op, v2, NE, NULL_RTX, mode,
> > 			   false, done_label, PROB_EVEN);
> >   emit_move_insn (ret, const1_rtx);
> >   emit_label (done_label);
> >   return ret;
> > or similar.
> 
> Thanks for the review/suggestion/code!

Note, as I found later, emit_cmp_and_jump_insns is good enough only
for certain modes on certain architectures (in particular, for
cases where can_compare_p returns true).
So it is better to use do_compare_rtx_and_jump instead of
emit_cmp_and_jump_insns, because it handles also the cases which
emit_cmp_and_jump_insns silently mishandles.  You'll need to reorder
the arguments a little bit and add one NULL_RTX argument.
See https://gcc.gnu.org/bugzilla/show_bug.cgi?id=63848#c4

	Jakub
diff mbox

Patch

diff --git gcc/ChangeLog.gomp gcc/ChangeLog.gomp
index b370616..a3650c5 100644
--- gcc/ChangeLog.gomp
+++ gcc/ChangeLog.gomp
@@ -1,3 +1,8 @@ 
+2014-12-18  Thomas Schwinge  <thomas@codesourcery.com>
+	    Jakub Jelinek  <jakub@redhat.com>
+
+	* builtins.c (expand_builtin_acc_on_device): Make more RTXy.
+
 2014-12-17  Thomas Schwinge  <thomas@codesourcery.com>
 	    Bernd Schmidt  <bernds@codesourcery.com>
 
diff --git gcc/builtins.c gcc/builtins.c
index fcf3f53..e946521 100644
--- gcc/builtins.c
+++ gcc/builtins.c
@@ -5889,38 +5889,36 @@  expand_stack_save (void)
    acceleration device (ACCEL_COMPILER conditional).  */
 
 static rtx
-expand_builtin_acc_on_device (tree exp, rtx target ATTRIBUTE_UNUSED)
+expand_builtin_acc_on_device (tree exp, rtx target)
 {
   if (!validate_arglist (exp, INTEGER_TYPE, VOID_TYPE))
     return NULL_RTX;
 
-  tree arg, v1, v2, ret;
-  location_t loc;
-
-  arg = CALL_EXPR_ARG (exp, 0);
-  arg = builtin_save_expr (arg);
-  loc = EXPR_LOCATION (exp);
-
-  /* Build: (arg == v1 || arg == v2) ? 1 : 0.  */
+  tree arg = CALL_EXPR_ARG (exp, 0);
 
+  /* Return (arg == v1 || arg == v2) ? 1 : 0.  */
+  machine_mode v_mode = TYPE_MODE (TREE_TYPE (arg));
+  rtx v = expand_normal (arg), v1, v2;
 #ifdef ACCEL_COMPILER
-  v1 = build_int_cst (TREE_TYPE (arg), GOMP_DEVICE_NOT_HOST);
-  v2 = build_int_cst (TREE_TYPE (arg), ACCEL_COMPILER_acc_device);
+  v1 = GEN_INT (GOMP_DEVICE_NOT_HOST);
+  v2 = GEN_INT (ACCEL_COMPILER_acc_device);
 #else
-  v1 = build_int_cst (TREE_TYPE (arg), GOMP_DEVICE_NONE);
-  v2 = build_int_cst (TREE_TYPE (arg), GOMP_DEVICE_HOST);
+  v1 = GEN_INT (GOMP_DEVICE_NONE);
+  v2 = GEN_INT (GOMP_DEVICE_HOST);
 #endif
+  machine_mode target_mode = TYPE_MODE (integer_type_node);
+  if (!REG_P (target) || GET_MODE (target) != target_mode)
+    target = gen_reg_rtx (target_mode);
+  emit_move_insn (target, const0_rtx);
+  rtx_code_label *done_label = gen_label_rtx ();
+  emit_cmp_and_jump_insns (v, v1, NE, NULL_RTX, v_mode,
+			   false, done_label, PROB_EVEN);
+  emit_cmp_and_jump_insns (v, v2, NE, NULL_RTX, v_mode,
+			   false, done_label, PROB_EVEN);
+  emit_move_insn (target, const1_rtx);
+  emit_label (done_label);
 
-  v1 = fold_build2_loc (loc, EQ_EXPR, integer_type_node, arg, v1);
-  v2 = fold_build2_loc (loc, EQ_EXPR, integer_type_node, arg, v2);
-
-  /* Can't use TRUTH_ORIF_EXPR, as that is not supported by
-     expand_expr_real*.  */
-  ret = fold_build3_loc (loc, COND_EXPR, integer_type_node, v1, v1, v2);
-  ret = fold_build3_loc (loc, COND_EXPR, integer_type_node,
-			 ret, integer_one_node, integer_zero_node);
-
-  return expand_normal (ret);
+  return target;
 }