diff mbox

New post-LTO OpenACC pass

Message ID 560030A9.5020705@acm.org
State New
Headers show

Commit Message

Nathan Sidwell Sept. 21, 2015, 4:30 p.m. UTC
Jakub,
this patch adds a new transforming pass, which executes after the LTO readback 
pass, and hence knows whether it is targeting the host or (a) device.

The contents of the pass will be built out -- it does much more on the gomp4 
pass.  This instance simply scans and replaces the acc_on_device builtin. 
Expanding early will allow such things as constant propagation and dead code 
removal earlier.

We still need the traditional expansion at RTL time too, because this function 
is used when building the library, in case the user does something crazy, like 
calling via a pointer.

The scanning code is written such that the replaced code is also scanned.  This 
will occur for the later transforms that might expand to internal builtins which 
themselves could be optimized.

The 'get_oacc_fn_attrib' was also present in the launch API patch.  Although 
used just internally to omp-low.c in this patch, it ends up being more widely used.

ok for trunk?

nathan
diff mbox

Patch

2015-09-21  Nathan Sidwell  <nathan@codesourcery.com>
	    Cesar Philippidis  <cesar@codesourcery.com>

	* omp-low.h (get_oacc_fn_attrib): Declare.
	* omp-low.c (get_oacc_fn_attrib): New.
	(oacc_xform_on_device): New.
	(execute_oacc_transform): New pass.
	(pass_data_oacc_transform): New.
	(pass_oacc_transform): New.
	(make_pass_oacc_transform): New.
	* tree-pass.h (make_pass_oacc_transform): Declare.
	* passes.def: Add pass_oacc_transform.

Index: omp-low.c
===================================================================
--- omp-low.c	(revision 227968)
+++ omp-low.c	(working copy)
@@ -8860,6 +8860,16 @@  expand_omp_atomic (struct omp_region *re
   expand_omp_atomic_mutex (load_bb, store_bb, addr, loaded_val, stored_val);
 }
 
+#define OACC_FN_ATTRIB "oacc function"
+
+/* Retrieve the oacc function attrib and return it.  Non-oacc
+   functions will return NULL.  */
+
+tree
+get_oacc_fn_attrib (tree fn)
+{
+  return lookup_attribute (OACC_FN_ATTRIB, DECL_ATTRIBUTES (fn));
+}
 
 /* Expand the GIMPLE_OMP_TARGET starting at REGION.  */
 
@@ -13909,4 +13919,131 @@  omp_finish_file (void)
     }
 }
 
+/* Transform an acc_on_device call.  OpenACC 2.0a requires this folded at
+   compile time for constant operands.  We always fold it.  In an
+   offloaded function we're never 'none'.  */
+
+static void
+oacc_xform_on_device (gimple *call)
+{
+  tree arg = gimple_call_arg (call, 0);
+  unsigned val = GOMP_DEVICE_HOST;
+	      
+#ifdef ACCEL_COMPILER
+  val = GOMP_DEVICE_NOT_HOST;
+#endif
+  tree result = build2 (EQ_EXPR, boolean_type_node, arg,
+			build_int_cst (integer_type_node, val));
+#ifdef ACCEL_COMPILER
+  {
+    tree dev  = build2 (EQ_EXPR, boolean_type_node, arg,
+			build_int_cst (integer_type_node,
+				       ACCEL_COMPILER_acc_device));
+    result = build2 (TRUTH_OR_EXPR, boolean_type_node, result, dev);
+  }
+#endif
+  result = fold_convert (integer_type_node, result);
+  tree lhs = gimple_call_lhs (call);
+  gimple_seq seq = NULL;
+
+  push_gimplify_context (true);
+  gimplify_assign (lhs, result, &seq);
+  pop_gimplify_context (NULL);
+
+  gimple_stmt_iterator gsi = gsi_for_stmt (call);
+  gsi_replace_with_seq (&gsi, seq, false);
+}
+
+/* Main entry point for oacc transformations which run on the device
+   compiler after LTO, so we know what the target device is at this
+   point (including the host fallback).  */
+
+static unsigned int
+execute_oacc_transform ()
+{
+  tree attrs = get_oacc_fn_attrib (current_function_decl);
+  
+  if (!attrs)
+    /* Not an offloaded function.  */
+    return 0;
+
+  basic_block bb;
+  FOR_ALL_BB_FN (bb, cfun)
+    for (gimple_stmt_iterator gsi = gsi_start_bb (bb);
+	 !gsi_end_p (gsi); gsi_next (&gsi))
+      {
+	gimple *stmt = gsi_stmt (gsi);
+	bool rescan = false;
+
+	if (!is_gimple_call (stmt))
+	  continue;
+
+	/* Rewind to allow rescan.  */
+	gsi_prev (&gsi);
+
+	gcall *call = as_a <gcall *> (stmt);
+	
+	if (gimple_call_builtin_p (call, BUILT_IN_ACC_ON_DEVICE))
+	  /* acc_on_device must be evaluated at compile time for
+	     constant arguments.  */
+	  {
+	    oacc_xform_on_device (call);
+	    rescan = true;
+	  }
+
+	if (gsi_end_p (gsi))
+	  /* We rewound past the beginning of the BB.  */
+	  gsi = gsi_start_bb (bb);
+
+	if (!rescan)
+	  /* Undo the rewind, so we don't get stuck infinitely.  */
+	  gsi_next (&gsi);
+      }
+
+  return 0;
+}
+
+namespace {
+
+const pass_data pass_data_oacc_transform =
+{
+  GIMPLE_PASS, /* type */
+  "fold_oacc_transform", /* name */
+  OPTGROUP_NONE, /* optinfo_flags */
+  TV_NONE, /* tv_id */
+  PROP_cfg, /* properties_required */
+  0 /* Possibly PROP_gimple_eomp.  */, /* properties_provided */
+  0, /* properties_destroyed */
+  0, /* todo_flags_start */
+  TODO_update_ssa | TODO_cleanup_cfg, /* todo_flags_finish */
+};
+
+class pass_oacc_transform : public gimple_opt_pass
+{
+public:
+  pass_oacc_transform (gcc::context *ctxt)
+    : gimple_opt_pass (pass_data_oacc_transform, ctxt)
+  {}
+
+  /* opt_pass methods: */
+  virtual unsigned int execute (function *)
+    {
+      bool gate = (flag_openacc != 0 && !seen_error ());
+
+      if (!gate)
+	return 0;
+
+      return execute_oacc_transform ();
+    }
+
+}; // class pass_oacc_transform
+
+} // anon namespace
+
+gimple_opt_pass *
+make_pass_oacc_transform (gcc::context *ctxt)
+{
+  return new pass_oacc_transform (ctxt);
+}
+
 #include "gt-omp-low.h"
Index: omp-low.h
===================================================================
--- omp-low.h	(revision 227968)
+++ omp-low.h	(working copy)
@@ -28,6 +28,7 @@  extern void free_omp_regions (void);
 extern tree omp_reduction_init (tree, tree);
 extern bool make_gimple_omp_edges (basic_block, struct omp_region **, int *);
 extern void omp_finish_file (void);
+extern tree get_oacc_fn_attrib (tree);
 
 extern GTY(()) vec<tree, va_gc> *offload_funcs;
 extern GTY(()) vec<tree, va_gc> *offload_vars;
Index: tree-pass.h
===================================================================
--- tree-pass.h	(revision 227968)
+++ tree-pass.h	(working copy)
@@ -406,6 +406,7 @@  extern gimple_opt_pass *make_pass_lower_
 extern gimple_opt_pass *make_pass_diagnose_omp_blocks (gcc::context *ctxt);
 extern gimple_opt_pass *make_pass_expand_omp (gcc::context *ctxt);
 extern gimple_opt_pass *make_pass_expand_omp_ssa (gcc::context *ctxt);
+extern gimple_opt_pass *make_pass_oacc_transform (gcc::context *ctxt);
 extern gimple_opt_pass *make_pass_object_sizes (gcc::context *ctxt);
 extern gimple_opt_pass *make_pass_strlen (gcc::context *ctxt);
 extern gimple_opt_pass *make_pass_fold_builtins (gcc::context *ctxt);
Index: passes.def
===================================================================
--- passes.def	(revision 227968)
+++ passes.def	(working copy)
@@ -148,6 +148,7 @@  along with GCC; see the file COPYING3.
   INSERT_PASSES_AFTER (all_passes)
   NEXT_PASS (pass_fixup_cfg);
   NEXT_PASS (pass_lower_eh_dispatch);
+  NEXT_PASS (pass_oacc_transform);
   NEXT_PASS (pass_all_optimizations);
   PUSH_INSERT_PASSES_WITHIN (pass_all_optimizations)
       NEXT_PASS (pass_remove_cgraph_callee_edges);