2015-09-21 Nathan Sidwell <nathan@codesourcery.com>
Cesar Philippidis <cesar@codesourcery.com>
* omp-low.h (get_oacc_fn_attrib): Declare.
* omp-low.c (get_oacc_fn_attrib): New.
(oacc_xform_on_device): New.
(execute_oacc_transform): New pass.
(pass_data_oacc_transform): New.
(pass_oacc_transform): New.
(make_pass_oacc_transform): New.
* tree-pass.h (make_pass_oacc_transform): Declare.
* passes.def: Add pass_oacc_transform.
===================================================================
@@ -8860,6 +8860,16 @@ expand_omp_atomic (struct omp_region *re
expand_omp_atomic_mutex (load_bb, store_bb, addr, loaded_val, stored_val);
}
+#define OACC_FN_ATTRIB "oacc function"
+
+/* Retrieve the oacc function attrib and return it. Non-oacc
+ functions will return NULL. */
+
+tree
+get_oacc_fn_attrib (tree fn)
+{
+ return lookup_attribute (OACC_FN_ATTRIB, DECL_ATTRIBUTES (fn));
+}
/* Expand the GIMPLE_OMP_TARGET starting at REGION. */
@@ -13909,4 +13919,131 @@ omp_finish_file (void)
}
}
+/* Transform an acc_on_device call. OpenACC 2.0a requires this folded at
+ compile time for constant operands. We always fold it. In an
+ offloaded function we're never 'none'. */
+
+static void
+oacc_xform_on_device (gimple *call)
+{
+ tree arg = gimple_call_arg (call, 0);
+ unsigned val = GOMP_DEVICE_HOST;
+
+#ifdef ACCEL_COMPILER
+ val = GOMP_DEVICE_NOT_HOST;
+#endif
+ tree result = build2 (EQ_EXPR, boolean_type_node, arg,
+ build_int_cst (integer_type_node, val));
+#ifdef ACCEL_COMPILER
+ {
+ tree dev = build2 (EQ_EXPR, boolean_type_node, arg,
+ build_int_cst (integer_type_node,
+ ACCEL_COMPILER_acc_device));
+ result = build2 (TRUTH_OR_EXPR, boolean_type_node, result, dev);
+ }
+#endif
+ result = fold_convert (integer_type_node, result);
+ tree lhs = gimple_call_lhs (call);
+ gimple_seq seq = NULL;
+
+ push_gimplify_context (true);
+ gimplify_assign (lhs, result, &seq);
+ pop_gimplify_context (NULL);
+
+ gimple_stmt_iterator gsi = gsi_for_stmt (call);
+ gsi_replace_with_seq (&gsi, seq, false);
+}
+
+/* Main entry point for oacc transformations which run on the device
+ compiler after LTO, so we know what the target device is at this
+ point (including the host fallback). */
+
+static unsigned int
+execute_oacc_transform ()
+{
+ tree attrs = get_oacc_fn_attrib (current_function_decl);
+
+ if (!attrs)
+ /* Not an offloaded function. */
+ return 0;
+
+ basic_block bb;
+ FOR_ALL_BB_FN (bb, cfun)
+ for (gimple_stmt_iterator gsi = gsi_start_bb (bb);
+ !gsi_end_p (gsi); gsi_next (&gsi))
+ {
+ gimple *stmt = gsi_stmt (gsi);
+ bool rescan = false;
+
+ if (!is_gimple_call (stmt))
+ continue;
+
+ /* Rewind to allow rescan. */
+ gsi_prev (&gsi);
+
+ gcall *call = as_a <gcall *> (stmt);
+
+ if (gimple_call_builtin_p (call, BUILT_IN_ACC_ON_DEVICE))
+ /* acc_on_device must be evaluated at compile time for
+ constant arguments. */
+ {
+ oacc_xform_on_device (call);
+ rescan = true;
+ }
+
+ if (gsi_end_p (gsi))
+ /* We rewound past the beginning of the BB. */
+ gsi = gsi_start_bb (bb);
+
+ if (!rescan)
+ /* Undo the rewind, so we don't get stuck infinitely. */
+ gsi_next (&gsi);
+ }
+
+ return 0;
+}
+
+namespace {
+
+const pass_data pass_data_oacc_transform =
+{
+ GIMPLE_PASS, /* type */
+ "fold_oacc_transform", /* name */
+ OPTGROUP_NONE, /* optinfo_flags */
+ TV_NONE, /* tv_id */
+ PROP_cfg, /* properties_required */
+ 0 /* Possibly PROP_gimple_eomp. */, /* properties_provided */
+ 0, /* properties_destroyed */
+ 0, /* todo_flags_start */
+ TODO_update_ssa | TODO_cleanup_cfg, /* todo_flags_finish */
+};
+
+class pass_oacc_transform : public gimple_opt_pass
+{
+public:
+ pass_oacc_transform (gcc::context *ctxt)
+ : gimple_opt_pass (pass_data_oacc_transform, ctxt)
+ {}
+
+ /* opt_pass methods: */
+ virtual unsigned int execute (function *)
+ {
+ bool gate = (flag_openacc != 0 && !seen_error ());
+
+ if (!gate)
+ return 0;
+
+ return execute_oacc_transform ();
+ }
+
+}; // class pass_oacc_transform
+
+} // anon namespace
+
+gimple_opt_pass *
+make_pass_oacc_transform (gcc::context *ctxt)
+{
+ return new pass_oacc_transform (ctxt);
+}
+
#include "gt-omp-low.h"
===================================================================
@@ -28,6 +28,7 @@ extern void free_omp_regions (void);
extern tree omp_reduction_init (tree, tree);
extern bool make_gimple_omp_edges (basic_block, struct omp_region **, int *);
extern void omp_finish_file (void);
+extern tree get_oacc_fn_attrib (tree);
extern GTY(()) vec<tree, va_gc> *offload_funcs;
extern GTY(()) vec<tree, va_gc> *offload_vars;
===================================================================
@@ -406,6 +406,7 @@ extern gimple_opt_pass *make_pass_lower_
extern gimple_opt_pass *make_pass_diagnose_omp_blocks (gcc::context *ctxt);
extern gimple_opt_pass *make_pass_expand_omp (gcc::context *ctxt);
extern gimple_opt_pass *make_pass_expand_omp_ssa (gcc::context *ctxt);
+extern gimple_opt_pass *make_pass_oacc_transform (gcc::context *ctxt);
extern gimple_opt_pass *make_pass_object_sizes (gcc::context *ctxt);
extern gimple_opt_pass *make_pass_strlen (gcc::context *ctxt);
extern gimple_opt_pass *make_pass_fold_builtins (gcc::context *ctxt);
===================================================================
@@ -148,6 +148,7 @@ along with GCC; see the file COPYING3.
INSERT_PASSES_AFTER (all_passes)
NEXT_PASS (pass_fixup_cfg);
NEXT_PASS (pass_lower_eh_dispatch);
+ NEXT_PASS (pass_oacc_transform);
NEXT_PASS (pass_all_optimizations);
PUSH_INSERT_PASSES_WITHIN (pass_all_optimizations)
NEXT_PASS (pass_remove_cgraph_callee_edges);