diff mbox

[10/16] Add pass_oacc_kernels pass group in passes.def

Message ID 564DA4CA.3020506@mentor.com
State New
Headers show

Commit Message

Tom de Vries Nov. 19, 2015, 10:30 a.m. UTC
On 16/11/15 13:45, Richard Biener wrote:
>> I've eliminated all the uses for pass_tree_loop_init/pass_tree_loop_done in
>> >the pass group. Instead, I've added conditional loop optimizer setup in:
>> >-  pass_lim and pass_scev_cprop (added in this patch), and

Reposting the "Add pass_oacc_kernels pass group in passes.def" patch.

pass_scev_cprop is no longer part of the pass group.

And I've dropped the scev_initialize in pass_lim.

Pass_lim is part of the pass_tree_loop pass group, where AFAIU scev info 
is initialized at the start of the pass group and updated or reset by 
passes in the pass group if necessary, such that it's always available, 
or can be recalculated on the spot.

First, pass_lim doesn't invalidate scev info. And second, AFAIU pass_lim 
doesn't use scev info. So there doesn't seem to be a need to do anything 
about scev info for using pass_lim outside pass_tree_loop.

>> >- pass_parallelize_loops_oacc_kernels (added in patch "Add
>> >   pass_parallelize_loops_oacc_kernels").
> You miss calling scev_finalize ().

I've added the scev_finalize () in patch "Add 
pass_parallelize_loops_oacc_kernels".

Thanks,
- Tom
diff mbox

Patch

Add pass_oacc_kernels pass group in passes.def

2015-11-09  Tom de Vries  <tom@codesourcery.com>

	* omp-low.c (pass_expand_omp_ssa::clone): New function.
	* passes.def: Add pass_oacc_kernels pass group.
	* tree-ssa-loop-ch.c (pass_ch::clone): New function.
	* tree-ssa-loop-im.c (tree_ssa_lim): Make static.
	(pass_lim::execute): Allow to run outside pass_tree_loop.

---
 gcc/omp-low.c          |  1 +
 gcc/passes.def         | 25 +++++++++++++++++++++++++
 gcc/tree-ssa-loop-ch.c |  2 ++
 gcc/tree-ssa-loop-im.c | 10 +++++++++-
 4 files changed, 37 insertions(+), 1 deletion(-)

diff --git a/gcc/omp-low.c b/gcc/omp-low.c
index 9c27396..d2f88b3 100644
--- a/gcc/omp-low.c
+++ b/gcc/omp-low.c
@@ -13385,6 +13385,7 @@  public:
       return !(fun->curr_properties & PROP_gimple_eomp);
     }
   virtual unsigned int execute (function *) { return execute_expand_omp (); }
+  opt_pass * clone () { return new pass_expand_omp_ssa (m_ctxt); }
 
 }; // class pass_expand_omp_ssa
 
diff --git a/gcc/passes.def b/gcc/passes.def
index 17027786..00446c3 100644
--- a/gcc/passes.def
+++ b/gcc/passes.def
@@ -88,7 +88,32 @@  along with GCC; see the file COPYING3.  If not see
 	  /* pass_build_ealias is a dummy pass that ensures that we
 	     execute TODO_rebuild_alias at this point.  */
 	  NEXT_PASS (pass_build_ealias);
+	  /* Pass group that runs when the function is an offloaded function
+	     containing oacc kernels loops.  Part 1.  */
+	  NEXT_PASS (pass_oacc_kernels);
+	  PUSH_INSERT_PASSES_WITHIN (pass_oacc_kernels)
+	      /* We need pass_ch here, because pass_lim has no effect on
+	         exit-first loops (PR65442).  Ideally we want to remove both
+		 this pass instantiation, and the reverse transformation
+		 transform_to_exit_first_loop_alt, which is done in
+		 pass_parallelize_loops_oacc_kernels. */
+	      NEXT_PASS (pass_ch);
+	  POP_INSERT_PASSES ()
 	  NEXT_PASS (pass_fre);
+	  /* Pass group that runs when the function is an offloaded function
+	     containing oacc kernels loops.  Part 2.  */
+	  NEXT_PASS (pass_oacc_kernels2);
+	  PUSH_INSERT_PASSES_WITHIN (pass_oacc_kernels2)
+	      /* We use pass_lim to rewrite in-memory iteration and reduction
+	         variable accesses in loops into local variables accesses.  */
+	      NEXT_PASS (pass_lim);
+	      NEXT_PASS (pass_dominator, false /* may_peel_loop_headers_p */);
+	      NEXT_PASS (pass_lim);
+	      NEXT_PASS (pass_dominator, false /* may_peel_loop_headers_p */);
+	      NEXT_PASS (pass_dce);
+	      NEXT_PASS (pass_parallelize_loops_oacc_kernels);
+	      NEXT_PASS (pass_expand_omp_ssa);
+	  POP_INSERT_PASSES ()
 	  NEXT_PASS (pass_merge_phi);
           NEXT_PASS (pass_dse);
 	  NEXT_PASS (pass_cd_dce);
diff --git a/gcc/tree-ssa-loop-ch.c b/gcc/tree-ssa-loop-ch.c
index 7e618bf..6493fcc 100644
--- a/gcc/tree-ssa-loop-ch.c
+++ b/gcc/tree-ssa-loop-ch.c
@@ -165,6 +165,8 @@  public:
   /* Initialize and finalize loop structures, copying headers inbetween.  */
   virtual unsigned int execute (function *);
 
+  opt_pass * clone () { return new pass_ch (m_ctxt); }
+
 protected:
   /* ch_base method: */
   virtual bool process_loop_p (struct loop *loop);
diff --git a/gcc/tree-ssa-loop-im.c b/gcc/tree-ssa-loop-im.c
index 30b53ce..96f05f2 100644
--- a/gcc/tree-ssa-loop-im.c
+++ b/gcc/tree-ssa-loop-im.c
@@ -2496,7 +2496,7 @@  tree_ssa_lim_finalize (void)
 /* Moves invariants from loops.  Only "expensive" invariants are moved out --
    i.e. those that are likely to be win regardless of the register pressure.  */
 
-unsigned int
+static unsigned int
 tree_ssa_lim (void)
 {
   unsigned int todo;
@@ -2560,9 +2560,17 @@  public:
 unsigned int
 pass_lim::execute (function *fun)
 {
+  if (!loops_state_satisfies_p (LOOPS_NORMAL
+				| LOOPS_HAVE_RECORDED_EXITS))
+    loop_optimizer_init (LOOPS_NORMAL
+			 | LOOPS_HAVE_RECORDED_EXITS);
+
   if (number_of_loops (fun) <= 1)
     return 0;
 
+  if (!loops_state_satisfies_p (LOOP_CLOSED_SSA))
+    rewrite_into_loop_closed_ssa (NULL, TODO_update_ssa);
+
   return tree_ssa_lim ();
 }