diff mbox

Add -ftree-loop-distribute-patterns enabled at -O3.

Message ID 1280762714-27239-1-git-send-email-sebpop@gmail.com
State New
Headers show

Commit Message

Sebastian Pop Aug. 2, 2010, 3:25 p.m. UTC
Hi,

Here is the patch that I am testing on amd64-linux.  I will commit
this patch to trunk after regstrap.

Sebastian

---
 gcc/common.opt               |    4 +++
 gcc/doc/invoke.texi          |   25 ++++++++++++++++++++++-
 gcc/opts.c                   |    1 +
 gcc/tree-data-ref.c          |   26 ++++++++++++++++++++++++
 gcc/tree-data-ref.h          |    1 +
 gcc/tree-loop-distribution.c |   45 +++++++++++++++++++++++++++++------------
 6 files changed, 88 insertions(+), 14 deletions(-)

Comments

Gerald Pfeifer Aug. 7, 2010, 5:49 p.m. UTC | #1
On Mon, 2 Aug 2010, Sebastian Pop wrote:
> Here is the patch that I am testing on amd64-linux.  I will commit
> this patch to trunk after regstrap.

I think this would be worthwhile to add to 
http://gcc.gnu.org/gcc-4.6/changes.html ?

Gerald
H.J. Lu Oct. 20, 2010, 11:06 p.m. UTC | #2
On Mon, Aug 2, 2010 at 8:25 AM, Sebastian Pop <sebpop@gmail.com> wrote:
> Hi,
>
> Here is the patch that I am testing on amd64-linux.  I will commit
> this patch to trunk after regstrap.
>
> Sebastian
>

This new option caused:

http://gcc.gnu.org/bugzilla/show_bug.cgi?id=46107
H.J. Lu Dec. 18, 2010, 2:27 p.m. UTC | #3
On Wed, Oct 20, 2010 at 4:06 PM, H.J. Lu <hjl.tools@gmail.com> wrote:
> On Mon, Aug 2, 2010 at 8:25 AM, Sebastian Pop <sebpop@gmail.com> wrote:
>> Hi,
>>
>> Here is the patch that I am testing on amd64-linux.  I will commit
>> this patch to trunk after regstrap.
>>
>> Sebastian
>>
>
> This new option caused:
>
> http://gcc.gnu.org/bugzilla/show_bug.cgi?id=46107
>

This also caused:

http://gcc.gnu.org/bugzilla/show_bug.cgi?id=47002
diff mbox

Patch

diff --git a/gcc/common.opt b/gcc/common.opt
index 41a9838..8cb09ab 100644
--- a/gcc/common.opt
+++ b/gcc/common.opt
@@ -1337,6 +1337,10 @@  ftree-loop-distribution
 Common Report Var(flag_tree_loop_distribution) Optimization
 Enable loop distribution on trees
 
+ftree-loop-distribute-patterns
+Common Report Var(flag_tree_loop_distribute_patterns) Optimization
+Enable loop distribution for patterns transformed into a library call
+
 ftree-loop-im
 Common Report Var(flag_tree_loop_im) Init(1) Optimization
 Enable loop invariant motion on trees
diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi
index 73051de..68b64db 100644
--- a/gcc/doc/invoke.texi
+++ b/gcc/doc/invoke.texi
@@ -384,7 +384,7 @@  Objective-C and Objective-C++ Dialects}.
 -ftree-builtin-call-dce -ftree-ccp -ftree-ch -ftree-copy-prop @gol
 -ftree-copyrename -ftree-dce -ftree-dominator-opts -ftree-dse @gol
 -ftree-forwprop -ftree-fre -ftree-loop-if-convert -ftree-loop-im @gol
--ftree-phiprop -ftree-loop-distribution @gol
+-ftree-phiprop -ftree-loop-distribution -ftree-loop-distribute-patterns @gol
 -ftree-loop-ivcanon -ftree-loop-linear -ftree-loop-optimize @gol
 -ftree-parallelize-loops=@var{n} -ftree-pre -ftree-pta -ftree-reassoc @gol
 -ftree-sink -ftree-sra -ftree-switch-conversion @gol
@@ -6925,6 +6925,29 @@  DO I = 1, N
 ENDDO
 @end smallexample
 
+@item -ftree-loop-distribute-patterns
+Perform loop distribution of patterns that can be code generated with
+calls to a library.  This flag is enabled by default at @option{-O3}.
+
+This pass distributes the initialization loops and generates a call to
+memset zero.  For example, the loop
+@smallexample
+DO I = 1, N
+  A(I) = 0
+  B(I) = A(I) + I
+ENDDO
+@end smallexample
+is transformed to
+@smallexample
+DO I = 1, N
+   A(I) = 0
+ENDDO
+DO I = 1, N
+   B(I) = A(I) + I
+ENDDO
+@end smallexample
+and the initialization loop is transformed into a call to memset zero.
+
 @item -ftree-loop-im
 @opindex ftree-loop-im
 Perform loop invariant motion on trees.  This pass moves only invariants that
diff --git a/gcc/opts.c b/gcc/opts.c
index 07d7a23..2579e9f 100644
--- a/gcc/opts.c
+++ b/gcc/opts.c
@@ -862,6 +862,7 @@  decode_options (unsigned int argc, const char **argv,
 
   /* -O3 optimizations.  */
   opt3 = (optimize >= 3);
+  flag_tree_loop_distribute_patterns = opt3;
   flag_predictive_commoning = opt3;
   flag_inline_functions = opt3;
   flag_unswitch_loops = opt3;
diff --git a/gcc/tree-data-ref.c b/gcc/tree-data-ref.c
index e7aa277..2656350 100644
--- a/gcc/tree-data-ref.c
+++ b/gcc/tree-data-ref.c
@@ -5038,6 +5038,32 @@  stores_from_loop (struct loop *loop, VEC (gimple, heap) **stmts)
   free (bbs);
 }
 
+/* Initialize STMTS with all the statements of LOOP that contain a
+   store to memory of the form "A[i] = 0".  */
+
+void
+stores_zero_from_loop (struct loop *loop, VEC (gimple, heap) **stmts)
+{
+  unsigned int i;
+  basic_block bb;
+  gimple_stmt_iterator si;
+  gimple stmt;
+  tree op;
+  basic_block *bbs = get_loop_body_in_dom_order (loop);
+
+  for (i = 0; i < loop->num_nodes; i++)
+    for (bb = bbs[i], si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
+      if ((stmt = gsi_stmt (si))
+	  && gimple_vdef (stmt)
+	  && is_gimple_assign (stmt)
+	  && gimple_assign_rhs_code (stmt) == INTEGER_CST
+	  && (op = gimple_assign_rhs1 (stmt))
+	  && (integer_zerop (op) || real_zerop (op)))
+	VEC_safe_push (gimple, heap, *stmts, gsi_stmt (si));
+
+  free (bbs);
+}
+
 /* For a data reference REF, return the declaration of its base
    address or NULL_TREE if the base is not determined.  */
 
diff --git a/gcc/tree-data-ref.h b/gcc/tree-data-ref.h
index eff5348..9e18e26 100644
--- a/gcc/tree-data-ref.h
+++ b/gcc/tree-data-ref.h
@@ -564,6 +564,7 @@  index_in_loop_nest (int var, VEC (loop_p, heap) *loop_nest)
 }
 
 void stores_from_loop (struct loop *, VEC (gimple, heap) **);
+void stores_zero_from_loop (struct loop *, VEC (gimple, heap) **);
 void remove_similar_memory_refs (VEC (gimple, heap) **);
 bool rdg_defs_used_in_other_loops_p (struct graph *, int);
 bool have_similar_memory_accesses (gimple, gimple);
diff --git a/gcc/tree-loop-distribution.c b/gcc/tree-loop-distribution.c
index 099a7fe..5905406 100644
--- a/gcc/tree-loop-distribution.c
+++ b/gcc/tree-loop-distribution.c
@@ -1184,18 +1184,36 @@  tree_loop_distribution (void)
     {
       VEC (gimple, heap) *work_list = VEC_alloc (gimple, heap, 3);
 
-      /* With the following working list, we're asking distribute_loop
-	 to separate the stores of the loop: when dependences allow,
-	 it will end on having one store per loop.  */
-      stores_from_loop (loop, &work_list);
-
-      /* A simple heuristic for cache locality is to not split stores
-	 to the same array.  Without this call, an unrolled loop would
-	 be split into as many loops as unroll factor, each loop
-	 storing in the same array.  */
-      remove_similar_memory_refs (&work_list);
-
-      nb_generated_loops = distribute_loop (loop, work_list);
+      /* If both flag_tree_loop_distribute_patterns and
+	 flag_tree_loop_distribution are set, then only
+	 distribute_patterns is executed.  */
+      if (flag_tree_loop_distribute_patterns)
+	{
+	  /* With the following working list, we're asking
+	     distribute_loop to separate from the rest of the loop the
+	     stores of the form "A[i] = 0".  */
+	  stores_zero_from_loop (loop, &work_list);
+
+	  /* Do nothing if there are no patterns to be distributed.  */
+	  if (VEC_length (gimple, work_list) > 0)
+	    nb_generated_loops = distribute_loop (loop, work_list);
+	}
+      else if (flag_tree_loop_distribution)
+	{
+	  /* With the following working list, we're asking
+	     distribute_loop to separate the stores of the loop: when
+	     dependences allow, it will end on having one store per
+	     loop.  */
+	  stores_from_loop (loop, &work_list);
+
+	  /* A simple heuristic for cache locality is to not split
+	     stores to the same array.  Without this call, an unrolled
+	     loop would be split into as many loops as unroll factor,
+	     each loop storing in the same array.  */
+	  remove_similar_memory_refs (&work_list);
+
+	  nb_generated_loops = distribute_loop (loop, work_list);
+	}
 
       if (dump_file && (dump_flags & TDF_DETAILS))
 	{
@@ -1217,7 +1235,8 @@  tree_loop_distribution (void)
 static bool
 gate_tree_loop_distribution (void)
 {
-  return flag_tree_loop_distribution != 0;
+  return flag_tree_loop_distribution
+    || flag_tree_loop_distribute_patterns;
 }
 
 struct gimple_opt_pass pass_loop_distribution =