diff mbox series

Fix profile update after RTL unrolling

Message ID ZMK34wjgBF4MnGD7@kam.mff.cuni.cz
State New
Headers show
Series Fix profile update after RTL unrolling | expand

Commit Message

Jan Hubicka July 27, 2023, 6:30 p.m. UTC
This patch fixes profile update after RTL unroll, that is now done same way as
in tree one.  We still produce (slightly) corrupted profile for multiple exit
loops I can try to fix incrementally.

I also updated testcases to look for profile mismatches so they do not creep
back in again.

Bootstrapped/regtested x86_64-liux, comitted.

gcc/ChangeLog:

	* cfgloop.h (single_dom_exit): Declare.
	* cfgloopmanip.h (update_exit_probability_after_unrolling): Declare.
	* cfgrtl.cc (struct cfg_hooks): Fix comment.
	* loop-unroll.cc (unroll_loop_constant_iterations): Update exit edge.
	* tree-ssa-loop-ivopts.h (single_dom_exit): Do not declare it here.
	* tree-ssa-loop-manip.cc (update_exit_probability_after_unrolling):
	Break out from ...
	(tree_transform_and_unroll_loop): ... here;

gcc/testsuite/ChangeLog:

	* gcc.dg/tree-prof/peel-1.c: Test for profile mismatches.
	* gcc.dg/tree-prof/unroll-1.c: Test for profile mismatches.
	* gcc.dg/tree-ssa/peel1.c: Test for profile mismatches.
	* gcc.dg/unroll-1.c: Test for profile mismatches.
	* gcc.dg/unroll-3.c: Test for profile mismatches.
	* gcc.dg/unroll-4.c: Test for profile mismatches.
	* gcc.dg/unroll-5.c: Test for profile mismatches.
	* gcc.dg/unroll-6.c: Test for profile mismatches.
diff mbox series

Patch

diff --git a/gcc/cfgloop.h b/gcc/cfgloop.h
index 22293e1c237..c4622d4b853 100644
--- a/gcc/cfgloop.h
+++ b/gcc/cfgloop.h
@@ -921,6 +921,7 @@  extern bool get_estimated_loop_iterations (class loop *loop, widest_int *nit);
 extern bool get_max_loop_iterations (const class loop *loop, widest_int *nit);
 extern bool get_likely_max_loop_iterations (class loop *loop, widest_int *nit);
 extern int bb_loop_depth (const_basic_block);
+extern edge single_dom_exit (class loop *);
 
 /* Converts VAL to widest_int.  */
 
diff --git a/gcc/cfgloopmanip.h b/gcc/cfgloopmanip.h
index af6a29f70c4..dab7b31c1e7 100644
--- a/gcc/cfgloopmanip.h
+++ b/gcc/cfgloopmanip.h
@@ -68,5 +68,6 @@  class loop * loop_version (class loop *, void *,
 void adjust_loop_info_after_peeling (class loop *loop, int npeel, bool precise);
 void scale_dominated_blocks_in_loop (class loop *loop, basic_block bb,
 				     profile_count num, profile_count den);
+void update_exit_probability_after_unrolling (class loop *loop, edge new_exit);
 
 #endif /* GCC_CFGLOOPMANIP_H */
diff --git a/gcc/cfgrtl.cc b/gcc/cfgrtl.cc
index 36e43d0d737..abcb472e2a2 100644
--- a/gcc/cfgrtl.cc
+++ b/gcc/cfgrtl.cc
@@ -5409,7 +5409,7 @@  struct cfg_hooks cfg_layout_rtl_cfg_hooks = {
   rtl_flow_call_edges_add,
   NULL, /* execute_on_growing_pred */
   NULL, /* execute_on_shrinking_pred */
-  duplicate_loop_body_to_header_edge, /* duplicate loop for trees */
+  duplicate_loop_body_to_header_edge, /* duplicate loop for rtl */
   rtl_lv_add_condition_to_bb, /* lv_add_condition_to_bb */
   NULL, /* lv_adjust_loop_header_phi*/
   rtl_extract_cond_bb_edges, /* extract_cond_bb_edges */
diff --git a/gcc/loop-unroll.cc b/gcc/loop-unroll.cc
index 93333d8ba11..bbfa6ccc770 100644
--- a/gcc/loop-unroll.cc
+++ b/gcc/loop-unroll.cc
@@ -487,6 +487,7 @@  unroll_loop_constant_iterations (class loop *loop)
   bool exit_at_end = loop_exit_at_end_p (loop);
   struct opt_info *opt_info = NULL;
   bool ok;
+  bool flat = maybe_flat_loop_profile (loop);
 
   niter = desc->niter;
 
@@ -603,9 +604,14 @@  unroll_loop_constant_iterations (class loop *loop)
   ok = duplicate_loop_body_to_header_edge (
     loop, loop_latch_edge (loop), max_unroll, wont_exit, desc->out_edge,
     &remove_edges,
-    DLTHE_FLAG_UPDATE_FREQ | (opt_info ? DLTHE_RECORD_COPY_NUMBER : 0));
+    DLTHE_FLAG_UPDATE_FREQ | (opt_info ? DLTHE_RECORD_COPY_NUMBER : 0)
+    | (flat ? DLTHE_FLAG_FLAT_PROFILE : 0));
   gcc_assert (ok);
 
+  edge new_exit = single_dom_exit (loop);
+  if (new_exit)
+    update_exit_probability_after_unrolling (loop, new_exit);
+
   if (opt_info)
     {
       apply_opt_in_copies (opt_info, max_unroll, true, true);
diff --git a/gcc/profile-count.h b/gcc/profile-count.h
index 88a6431c21a..e860c5db540 100644
--- a/gcc/profile-count.h
+++ b/gcc/profile-count.h
@@ -650,6 +650,9 @@  public:
       return *this;
     }
 
+  /* Compute n-th power.  */
+  profile_probability pow (int) const;
+
   /* Get the value of the count.  */
   uint32_t value () const { return m_val; }
 
diff --git a/gcc/testsuite/gcc.dg/tree-prof/peel-1.c b/gcc/testsuite/gcc.dg/tree-prof/peel-1.c
index 7245b68c1ee..32ecccb16da 100644
--- a/gcc/testsuite/gcc.dg/tree-prof/peel-1.c
+++ b/gcc/testsuite/gcc.dg/tree-prof/peel-1.c
@@ -1,4 +1,4 @@ 
-/* { dg-options "-O3 -fdump-tree-cunroll-details -fno-unroll-loops -fpeel-loops" } */
+/* { dg-options "-O3 -fdump-tree-cunroll-details-blocks -fdump-tree-optimized-details-blocks -fno-unroll-loops -fpeel-loops" } */
 void abort();
 
 int a[1000];
@@ -21,3 +21,5 @@  main()
   return 0;
 }
 /* { dg-final-use { scan-tree-dump "Peeled loop ., 1 times" "cunroll" } } */
+/* { dg-final-use-not-autofdo { scan-tree-dump-not "Invalid sum" "cunroll" } } */
+/* { dg-final-use-not-autofdo { scan-tree-dump-not "Invalid sum" "optimized" } } */
diff --git a/gcc/testsuite/gcc.dg/tree-prof/unroll-1.c b/gcc/testsuite/gcc.dg/tree-prof/unroll-1.c
index 3ad0cf019b3..0b25c1f2f1c 100644
--- a/gcc/testsuite/gcc.dg/tree-prof/unroll-1.c
+++ b/gcc/testsuite/gcc.dg/tree-prof/unroll-1.c
@@ -1,4 +1,4 @@ 
-/* { dg-options "-O3 -fdump-rtl-loop2_unroll-details -funroll-loops -fno-peel-loops" } */
+/* { dg-options "-O3 -fdump-rtl-loop2_unroll-details-blocks -funroll-loops -fno-peel-loops" } */
 void abort ();
 
 int a[1000];
@@ -20,4 +20,5 @@  main()
     t();
   return 0;
 }
-/* { dg-final-use { scan-rtl-dump "considering unrolling loop with constant number of iterations" "loop2_unroll" } } */
+/* { dg-final-use-not-autofdo { scan-rtl-dump "considering unrolling loop with constant number of iterations" "loop2_unroll" } } */
+/* { dg-final-use-not-autofdo { scan-rtl-dump-not "Invalid sum" "loop2_unroll" } } */
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/peel1.c b/gcc/testsuite/gcc.dg/tree-ssa/peel1.c
index dc5848cb5c5..bc136605e94 100644
--- a/gcc/testsuite/gcc.dg/tree-ssa/peel1.c
+++ b/gcc/testsuite/gcc.dg/tree-ssa/peel1.c
@@ -1,5 +1,5 @@ 
 /* { dg-do compile } */
-/* { dg-options "-O3 -fno-tree-vectorize -fdump-tree-cunroll-details" } */
+/* { dg-options "-O3 -fno-tree-vectorize -fdump-tree-cunroll-details-blocks" } */
 struct foo {int b; int a[3];} foo;
 void add(struct foo *a,int l)
 {
@@ -9,3 +9,4 @@  void add(struct foo *a,int l)
 }
 /* { dg-final { scan-tree-dump "Loop 1 likely iterates at most 2 times." "cunroll"} } */
 /* { dg-final { scan-tree-dump "Peeled loop 1, 3 times." "cunroll"} } */
+/* { dg-final { scan-tree-dump-not "Invalid sum" "cunroll" } } */
diff --git a/gcc/testsuite/gcc.dg/unroll-1.c b/gcc/testsuite/gcc.dg/unroll-1.c
index e7032891823..ff2cbb07b22 100644
--- a/gcc/testsuite/gcc.dg/unroll-1.c
+++ b/gcc/testsuite/gcc.dg/unroll-1.c
@@ -1,7 +1,7 @@ 
 /* PR optimization/8599 */
 /* { dg-do run } */
 /* { dg-options "-O2 -funroll-loops" } */
-/* { dg-options "-mtune=k6 -O2 -funroll-loops" { target { { i?86-*-* x86_64-*-* } && ia32 } } } */
+/* { dg-options "-mtune=k6 -O2 -funroll-loops -fdump-rtl-loop2_unroll-details-blocks" { target { { i?86-*-* x86_64-*-* } && ia32 } } } */
 
 
 extern void abort (void);
@@ -25,3 +25,5 @@  int main()
     abort ();
   return 0;
 }
+/* { dg-final { scan-rtl-dump-not "Invalid sum" "loop2_unroll" } } */
+/* { dg-final { scan-rtl-dump-not "Invalid sum" "loop2_unroll" } } */
diff --git a/gcc/testsuite/gcc.dg/unroll-3.c b/gcc/testsuite/gcc.dg/unroll-3.c
index 10bf59b9a2e..fbc8378c73d 100644
--- a/gcc/testsuite/gcc.dg/unroll-3.c
+++ b/gcc/testsuite/gcc.dg/unroll-3.c
@@ -1,5 +1,5 @@ 
 /* { dg-do compile } */
-/* { dg-options "-O2 -fdump-tree-cunrolli-details -fno-peel-loops -fno-tree-vrp -fdisable-tree-cunrolli=foo -fenable-tree-cunrolli=foo" } */
+/* { dg-options "-O2 -fdump-tree-cunrolli-details-blocks -fno-peel-loops -fno-tree-vrp -fdisable-tree-cunrolli=foo -fenable-tree-cunrolli=foo" } */
 
 unsigned a[100], b[100];
 inline void bar()
@@ -29,3 +29,4 @@  int foo2(void)
 }
 
 /* { dg-final { scan-tree-dump-times "loop with 2 iterations completely unrolled" 1 "cunrolli" } } */
+/* { dg-final { scan-tree-dump-not "Invalid sum" "cunrolli" } } */
diff --git a/gcc/testsuite/gcc.dg/unroll-4.c b/gcc/testsuite/gcc.dg/unroll-4.c
index 17f19421227..055ef3f3545 100644
--- a/gcc/testsuite/gcc.dg/unroll-4.c
+++ b/gcc/testsuite/gcc.dg/unroll-4.c
@@ -1,5 +1,5 @@ 
 /* { dg-do compile } */
-/* { dg-options "-O2 -fdump-tree-cunrolli-details -fno-peel-loops -fno-tree-vrp -fdisable-tree-cunroll -fenable-tree-cunrolli=foo -fdisable-tree-cunrolli=foo2" } */
+/* { dg-options "-O2 -fdump-tree-cunrolli-details-blocks -fno-peel-loops -fno-tree-vrp -fdisable-tree-cunroll -fenable-tree-cunrolli=foo -fdisable-tree-cunrolli=foo2" } */
 
 unsigned a[100], b[100];
 inline void bar()
@@ -29,3 +29,4 @@  int foo2(void)
 }
 
 /* { dg-final { scan-tree-dump-times "loop with 2 iterations completely unrolled" 1 "cunrolli" } } */
+/* { dg-final { scan-tree-dump-not "Invalid sum" "cunrolli" } } */
diff --git a/gcc/testsuite/gcc.dg/unroll-5.c b/gcc/testsuite/gcc.dg/unroll-5.c
index f3bdebe9882..1f22b1fa5d6 100644
--- a/gcc/testsuite/gcc.dg/unroll-5.c
+++ b/gcc/testsuite/gcc.dg/unroll-5.c
@@ -1,5 +1,5 @@ 
 /* { dg-do compile } */
-/* { dg-options "-O2 -fdump-tree-cunrolli-details -fno-peel-loops -fno-tree-vrp -fdisable-tree-cunroll -fenable-tree-cunrolli=foo2 -fdisable-tree-cunrolli=foo" } */
+/* { dg-options "-O2 -fdump-tree-cunrolli-details-blocks -fno-peel-loops -fno-tree-vrp -fdisable-tree-cunroll -fenable-tree-cunrolli=foo2 -fdisable-tree-cunrolli=foo" } */
 
 unsigned a[100], b[100];
 inline void bar()
@@ -29,3 +29,4 @@  int foo2(void)
 }
 
 /* { dg-final { scan-tree-dump-times "loop with 2 iterations completely unrolled" 1 "cunrolli" } } */
+/* { dg-final { scan-tree-dump-not "Invalid sum" "cunrolli" } } */
diff --git a/gcc/testsuite/gcc.dg/unroll-6.c b/gcc/testsuite/gcc.dg/unroll-6.c
index e4c231ea79f..7664bbff109 100644
--- a/gcc/testsuite/gcc.dg/unroll-6.c
+++ b/gcc/testsuite/gcc.dg/unroll-6.c
@@ -1,5 +1,5 @@ 
 /* { dg-do compile } */
-/* { dg-options "-O3 -fdump-rtl-loop2_unroll -funroll-loops" } */
+/* { dg-options "-O3 -fdump-rtl-loop2_unroll-details-blocks -funroll-loops" } */
 /* { dg-require-effective-target int32plus } */
 
 void abort (void);
@@ -32,3 +32,4 @@  int t2()
 /* { dg-final { scan-rtl-dump-not "realistic bound: 999999" "loop2_unroll" } } */
 /* { dg-final { scan-rtl-dump-times "  upper bound: 2999999" 1 "loop2_unroll" } } */
 /* { dg-final { scan-rtl-dump-times "realistic bound: 2999999" 1 "loop2_unroll" } } */
+/* { dg-final { scan-rtl-dump-not "Invalid sum" "loop2_unroll" { xfail *-*-* } } } */
diff --git a/gcc/tree-ssa-loop-ivopts.h b/gcc/tree-ssa-loop-ivopts.h
index 7a53ce47f10..31ec893b9cb 100644
--- a/gcc/tree-ssa-loop-ivopts.h
+++ b/gcc/tree-ssa-loop-ivopts.h
@@ -20,7 +20,6 @@  along with GCC; see the file COPYING3.  If not see
 #ifndef GCC_TREE_SSA_LOOP_IVOPTS_H
 #define GCC_TREE_SSA_LOOP_IVOPTS_H
 
-extern edge single_dom_exit (class loop *);
 extern void dump_iv (FILE *, struct iv *);
 extern void dump_use (FILE *, struct iv_use *);
 extern void dump_uses (FILE *, struct ivopts_data *);
diff --git a/gcc/tree-ssa-loop-manip.cc b/gcc/tree-ssa-loop-manip.cc
index 8e3b1057b6f..e58892e235c 100644
--- a/gcc/tree-ssa-loop-manip.cc
+++ b/gcc/tree-ssa-loop-manip.cc
@@ -1040,6 +1040,29 @@  determine_exit_conditions (class loop *loop, class tree_niter_desc *desc,
   *exit_bound = bound;
 }
 
+/* Updat NEW_EXIT probability after loop has been unrolled.  */
+
+void
+update_exit_probability_after_unrolling (class loop *loop, edge new_exit)
+{
+  /* gimple_duplicate_loop_body_to_header_edge depending on
+     DLTHE_FLAG_UPDATE_FREQ either keeps original frequency of the loop header
+     or scales it down accordingly.
+     However exit edge probability is kept as original.  Fix it if needed
+     and compensate.  */
+  profile_probability new_prob
+	  = loop_preheader_edge
+		  (loop)->count ().probability_in (new_exit->src->count);
+  if (!(new_prob == new_exit->probability))
+    {
+      profile_count old_count = new_exit->src->count - new_exit->count ();
+      set_edge_probability_and_rescale_others (new_exit, new_prob);
+      profile_count new_count = new_exit->src->count - new_exit->count ();
+      scale_dominated_blocks_in_loop (loop, new_exit->src,
+				      new_count, old_count);
+    }
+}
+
 /* Unroll LOOP FACTOR times.  LOOP is known to have a single exit edge
    whose source block dominates the latch.  DESC describes the number of
    iterations of LOOP.
@@ -1266,23 +1289,7 @@  tree_transform_and_unroll_loop (class loop *loop, unsigned factor,
   update_ssa (TODO_update_ssa);
 
   new_exit = single_dom_exit (loop);
-
-  /* gimple_duplicate_loop_body_to_header_edge depending on
-     DLTHE_FLAG_UPDATE_FREQ either keeps original frequency of the loop header
-     or scales it down accordingly.
-     However exit edge probability is kept as original.  Fix it if needed
-     and compensate.  */
-  profile_probability new_prob
-	  = loop_preheader_edge
-		  (loop)->count ().probability_in (new_exit->src->count);
-  if (!(new_prob == new_exit->probability))
-    {
-      profile_count old_count = new_exit->src->count - new_exit->count ();
-      set_edge_probability_and_rescale_others (new_exit, new_prob);
-      profile_count new_count = new_exit->src->count - new_exit->count ();
-      scale_dominated_blocks_in_loop (loop, new_exit->src,
-				      new_count, old_count);
-    }
+  update_exit_probability_after_unrolling (loop, new_exit);
   if (!single_loop_p)
     {
       /* Finally create the new counter for number of iterations and add