diff mbox series

Fix profile update in tree_transform_and_unroll_loop

Message ID ZMJ9eeM5DRrlLsTv@kam.mff.cuni.cz
State New
Headers show
Series Fix profile update in tree_transform_and_unroll_loop | expand

Commit Message

Jan Hubicka July 27, 2023, 2:21 p.m. UTC
Hi,
This patch fixes profile update in tree_transform_and_unroll_loop which is used
by predictive comming.  I stared by attempt to fix
gcc.dg/tree-ssa/update-unroll-1.c I xfailed last week, but it turned to be
harder job.

Unrolling was never fixed for changes in duplicate_loop_body_to_header_edge
which is now smarter on getting profile right when some exists are eliminated.
A lot of manual profile can thus now be done using existing infrastructure.

I also noticed that scale_dominated_blocks_in_loop does job identical
to loop I wrote in scale_loop_profile and thus I commonized the implementaiton
and removed recursion.

I also extended duplicate_loop_body_to_header_edge to handle flat profiles same
way as we do in vectorizer. Without it we end up with less then 0 iteration
count in gcc.dg/tree-ssa/update-unroll-1.c (it is unrolled 32times but predicted
to iterated fewer times) and added missing code to update loop_info.

Bootstrapped/regtested x86_64-linux, comitted.

gcc/ChangeLog:

	* cfgloopmanip.cc (scale_dominated_blocks_in_loop): Move here from
	tree-ssa-loop-manip.cc and avoid recursion.
	(scale_loop_profile): Use scale_dominated_blocks_in_loop.
	(duplicate_loop_body_to_header_edge): Add DLTHE_FLAG_FLAT_PROFILE
	flag.
	* cfgloopmanip.h (DLTHE_FLAG_FLAT_PROFILE): Define.
	(scale_dominated_blocks_in_loop): Declare.
	* predict.cc (dump_prediction): Do not ICE on uninitialized probability.
	(change_edge_frequency): Remove.
	* predict.h (change_edge_frequency): Remove.
	* tree-ssa-loop-manip.cc (scale_dominated_blocks_in_loop): Move to
	cfgloopmanip.cc.
	(niter_for_unrolled_loop): Remove.
	(tree_transform_and_unroll_loop): Fix profile update.

gcc/testsuite/ChangeLog:

	* gcc.dg/pr102385.c: Check for no profile mismatches.
	* gcc.dg/pr96931.c: Check for no profile mismatches.
	* gcc.dg/tree-ssa/predcom-1.c: Check for no profile mismatches.
	* gcc.dg/tree-ssa/predcom-2.c: Check for no profile mismatches.
	* gcc.dg/tree-ssa/predcom-3.c: Check for no profile mismatches.
	* gcc.dg/tree-ssa/predcom-4.c: Check for no profile mismatches.
	* gcc.dg/tree-ssa/predcom-5.c: Check for no profile mismatches.
	* gcc.dg/tree-ssa/predcom-7.c: Check for one profile mismatch.
	* gcc.dg/tree-ssa/predcom-8.c: Check for no profile mismatches.
	* gcc.dg/tree-ssa/predcom-dse-1.c: Check for no profile mismatches.
	* gcc.dg/tree-ssa/predcom-dse-10.c: Check for no profile mismatches.
	* gcc.dg/tree-ssa/predcom-dse-11.c: Check for no profile mismatches.
	* gcc.dg/tree-ssa/predcom-dse-12.c: Check for no profile mismatches.
	* gcc.dg/tree-ssa/predcom-dse-2.c: Check for no profile mismatches.
	* gcc.dg/tree-ssa/predcom-dse-3.c: Check for no profile mismatches.
	* gcc.dg/tree-ssa/predcom-dse-4.c: Check for no profile mismatches.
	* gcc.dg/tree-ssa/predcom-dse-5.c: Check for no profile mismatches.
	* gcc.dg/tree-ssa/predcom-dse-6.c: Check for no profile mismatches.
	* gcc.dg/tree-ssa/predcom-dse-7.c: Check for no profile mismatches.
	* gcc.dg/tree-ssa/predcom-dse-8.c: Check for no profile mismatches.
	* gcc.dg/tree-ssa/predcom-dse-9.c: Check for no profile mismatches.
	* gcc.dg/tree-ssa/update-unroll-1.c: Unxfail.
diff mbox series

Patch

diff --git a/gcc/cfgloopmanip.cc b/gcc/cfgloopmanip.cc
index 3012a8d60f7..c3d292d0dd4 100644
--- a/gcc/cfgloopmanip.cc
+++ b/gcc/cfgloopmanip.cc
@@ -499,6 +499,32 @@  scale_loop_frequencies (class loop *loop, profile_probability p)
   free (bbs);
 }
 
+/* Scales the frequencies of all basic blocks in LOOP that are strictly
+   dominated by BB by NUM/DEN.  */
+
+void
+scale_dominated_blocks_in_loop (class loop *loop, basic_block bb,
+				profile_count num, profile_count den)
+{
+  basic_block son;
+
+  if (!den.nonzero_p () && !(num == profile_count::zero ()))
+    return;
+  auto_vec <basic_block, 8> worklist;
+  worklist.safe_push (bb);
+
+  while (!worklist.is_empty ())
+    for (son = first_dom_son (CDI_DOMINATORS, worklist.pop ());
+	 son;
+	 son = next_dom_son (CDI_DOMINATORS, son))
+      {
+	if (!flow_bb_inside_loop_p (loop, son))
+	  continue;
+	son->count = son->count.apply_scale (num, den);
+	worklist.safe_push (son);
+      }
+}
+
 /* Scale profile in LOOP by P.
    If ITERATION_BOUND is not -1, scale even further if loop is predicted
    to iterate too many times.
@@ -649,19 +675,9 @@  scale_loop_profile (class loop *loop, profile_probability p,
       if (other_edge && other_edge->dest == loop->latch)
 	loop->latch->count -= new_exit_count - old_exit_count;
       else
-	{
-	  basic_block *body = get_loop_body (loop);
-	  profile_count new_count = exit_edge->src->count - new_exit_count;
-	  profile_count old_count = exit_edge->src->count - old_exit_count;
-
-	  for (unsigned int i = 0; i < loop->num_nodes; i++)
-	    if (body[i] != exit_edge->src
-		&& dominated_by_p (CDI_DOMINATORS, body[i], exit_edge->src))
-	      body[i]->count = body[i]->count.apply_scale (new_count,
-							   old_count);
-
-	  free (body);
-	}
+	scale_dominated_blocks_in_loop (loop, exit_edge->src,
+					exit_edge->src->count - new_exit_count,
+					exit_edge->src->count - old_exit_count);
     }
   else if (dump_file && (dump_flags & TDF_DETAILS))
     {
@@ -1237,6 +1253,7 @@  duplicate_loop_body_to_header_edge (class loop *loop, edge e,
 	     should've managed the flags so all except for original loop
 	     has won't exist set.  */
 	  scale_act = wanted_count.probability_in (count_in);
+
 	  /* Now simulate the duplication adjustments and compute header
 	     frequency of the last copy.  */
 	  for (i = 0; i < ndupl; i++)
@@ -1252,16 +1269,21 @@  duplicate_loop_body_to_header_edge (class loop *loop, edge e,
 	  profile_probability prob_pass_main = bitmap_bit_p (wont_exit, 0)
 							? prob_pass_wont_exit
 							: prob_pass_thru;
-	  profile_probability p = prob_pass_main;
-	  profile_count scale_main_den = count_in;
-	  for (i = 0; i < ndupl; i++)
+	  if (!(flags & DLTHE_FLAG_FLAT_PROFILE))
 	    {
-	      scale_main_den += count_in.apply_probability (p);
-	      p = p * scale_step[i];
+	      profile_probability p = prob_pass_main;
+	      profile_count scale_main_den = count_in;
+	      for (i = 0; i < ndupl; i++)
+		{
+		  scale_main_den += count_in.apply_probability (p);
+		  p = p * scale_step[i];
+		}
+	      /* If original loop is executed COUNT_IN times, the unrolled
+		 loop will account SCALE_MAIN_DEN times.  */
+	      scale_main = count_in.probability_in (scale_main_den);
 	    }
-	  /* If original loop is executed COUNT_IN times, the unrolled
-	     loop will account SCALE_MAIN_DEN times.  */
-	  scale_main = count_in.probability_in (scale_main_den);
+	  else
+	    scale_main = profile_probability::always ();
 	  scale_act = scale_main * prob_pass_main;
 	}
       else
diff --git a/gcc/cfgloopmanip.h b/gcc/cfgloopmanip.h
index 75b2a5e9b75..af6a29f70c4 100644
--- a/gcc/cfgloopmanip.h
+++ b/gcc/cfgloopmanip.h
@@ -32,6 +32,8 @@  enum
 					   field of newly create BB.  */
 #define DLTHE_FLAG_COMPLETTE_PEEL 4	/* Update frequencies expecting
 					   a complete peeling.  */
+#define DLTHE_FLAG_FLAT_PROFILE 8	/* Profile is flat; do not reduce
+					   count by unroll factor.  */
 extern edge mfb_kj_edge;
 
 extern bool remove_path (edge, bool * = NULL, bitmap = NULL);
@@ -64,5 +66,7 @@  class loop * loop_version (class loop *, void *,
 			    profile_probability, profile_probability,
 			    profile_probability, profile_probability, bool);
 void adjust_loop_info_after_peeling (class loop *loop, int npeel, bool precise);
+void scale_dominated_blocks_in_loop (class loop *loop, basic_block bb,
+				     profile_count num, profile_count den);
 
 #endif /* GCC_CFGLOOPMANIP_H */
diff --git a/gcc/predict.cc b/gcc/predict.cc
index 6777e6cb9c5..5a1a561cc24 100644
--- a/gcc/predict.cc
+++ b/gcc/predict.cc
@@ -790,7 +790,7 @@  dump_prediction (FILE *file, enum br_predictor predictor, int probability,
     {
       fprintf (file, "  exec ");
       bb->count.dump (file);
-      if (e)
+      if (e && e->count ().initialized_p () && bb->count.to_gcov_type ())
 	{
 	  fprintf (file, " hit ");
 	  e->count ().dump (file);
@@ -4634,43 +4634,6 @@  force_edge_cold (edge e, bool impossible)
     }
 }
 
-/* Change E's probability to NEW_E_PROB, redistributing the probabilities
-   of other outgoing edges proportionally.
-
-   Note that this function does not change the profile counts of any
-   basic blocks.  The caller must do that instead, using whatever
-   information it has about the region that needs updating.  */
-
-void
-change_edge_frequency (edge e, profile_probability new_e_prob)
-{
-  profile_probability old_e_prob = e->probability;
-  profile_probability old_other_prob = old_e_prob.invert ();
-  profile_probability new_other_prob = new_e_prob.invert ();
-
-  e->probability = new_e_prob;
-  profile_probability cumulative_prob = new_e_prob;
-
-  unsigned int num_other = EDGE_COUNT (e->src->succs) - 1;
-  edge other_e;
-  edge_iterator ei;
-  FOR_EACH_EDGE (other_e, ei, e->src->succs)
-    if (other_e != e)
-      {
-	num_other -= 1;
-	if (num_other == 0)
-	  /* Ensure that the probabilities add up to 1 without
-	     rounding error.  */
-	  other_e->probability = cumulative_prob.invert ();
-	else
-	  {
-	    other_e->probability /= old_other_prob;
-	    other_e->probability *= new_other_prob;
-	    cumulative_prob += other_e->probability;
-	  }
-      }
-}
-
 #if CHECKING_P
 
 namespace selftest {
diff --git a/gcc/predict.h b/gcc/predict.h
index 4864b7d7113..42b7c00c46a 100644
--- a/gcc/predict.h
+++ b/gcc/predict.h
@@ -100,7 +100,6 @@  extern void rebuild_frequencies (void);
 extern void report_predictor_hitrates (void);
 extern void force_edge_cold (edge, bool);
 extern void propagate_unlikely_bbs_forward (void);
-extern void change_edge_frequency (edge, profile_probability);
 
 extern void add_reg_br_prob_note (rtx_insn *, profile_probability);
 
diff --git a/gcc/testsuite/gcc.dg/pr102385.c b/gcc/testsuite/gcc.dg/pr102385.c
index 1339540c722..bdccc9e43b1 100644
--- a/gcc/testsuite/gcc.dg/pr102385.c
+++ b/gcc/testsuite/gcc.dg/pr102385.c
@@ -1,4 +1,4 @@ 
-/* { dg-options "-Wall -Wextra -O2 -fno-toplevel-reorder -fno-tree-ch -fno-tree-dce -fno-tree-dominator-opts -fno-tree-dse -fno-tree-loop-ivcanon -fpredictive-commoning" } */
+/* { dg-options "-Wall -Wextra -O2 -fno-toplevel-reorder -fno-tree-ch -fno-tree-dce -fno-tree-dominator-opts -fno-tree-dse -fno-tree-loop-ivcanon -fpredictive-commoning -fdump-tree-pcom-details-blocks -fdump-tree-lim-details-blocks" } */
 
 short a, b;
 int c[9];
@@ -12,3 +12,5 @@  void e() {
   }
 }
 int main() {return 0;}
+/* { dg-final { scan-tree-dump-not "Invalid sum" "pcom" } } */
+/* { dg-final { scan-tree-dump-not "Invalid sum" "lim2" } } */
diff --git a/gcc/testsuite/gcc.dg/pr96931.c b/gcc/testsuite/gcc.dg/pr96931.c
index 94b8a1128ee..660391588ff 100644
--- a/gcc/testsuite/gcc.dg/pr96931.c
+++ b/gcc/testsuite/gcc.dg/pr96931.c
@@ -1,5 +1,5 @@ 
 /* { dg-do compile } */
-/* { dg-options "-O1 -fpredictive-commoning -fno-tree-loop-im" } */
+/* { dg-options "-O1 -fpredictive-commoning -fno-tree-loop-im -fdump-tree-pcom-details-blocks" } */
 
 int bl;
 
@@ -17,3 +17,4 @@  ie (void)
 
   ie ();
 }
+/* { dg-final { scan-tree-dump-not "Invalid sum" "pcom" } } */
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/predcom-1.c b/gcc/testsuite/gcc.dg/tree-ssa/predcom-1.c
index 8c3d9a4fc58..d0a5318e351 100644
--- a/gcc/testsuite/gcc.dg/tree-ssa/predcom-1.c
+++ b/gcc/testsuite/gcc.dg/tree-ssa/predcom-1.c
@@ -1,6 +1,6 @@ 
 /* { dg-do compile } */
 /* { dg-do run } */
-/* { dg-options "-O2 -fno-tree-vectorize -funroll-loops --param max-unroll-times=8 -fpredictive-commoning -fdump-tree-pcom-details" } */
+/* { dg-options "-O2 -fno-tree-vectorize -funroll-loops --param max-unroll-times=8 -fpredictive-commoning -fdump-tree-pcom-details-blocks" } */
 
 void abort (void);
 
@@ -47,3 +47,4 @@  int main(void)
 
 /* Also check that we undid the transformation previously made by PRE.  */
 /* { dg-final { scan-tree-dump-times "looparound ref" 1 "pcom" } } */
+/* { dg-final { scan-tree-dump-not "Invalid sum" "pcom" } } */
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/predcom-2.c b/gcc/testsuite/gcc.dg/tree-ssa/predcom-2.c
index 1c54679c022..f19edd4cd74 100644
--- a/gcc/testsuite/gcc.dg/tree-ssa/predcom-2.c
+++ b/gcc/testsuite/gcc.dg/tree-ssa/predcom-2.c
@@ -1,5 +1,5 @@ 
 /* { dg-do run } */
-/* { dg-options "-O2 -funroll-loops --param max-unroll-times=8 -fpredictive-commoning -fdump-tree-pcom-details -fno-tree-pre" } */
+/* { dg-options "-O2 -funroll-loops --param max-unroll-times=8 -fpredictive-commoning -fdump-tree-pcom-details-blocks -fno-tree-pre" } */
 /* { dg-additional-options "-fno-tree-vectorize" { target amdgcn-*-* } } */
 
 void abort (void);
@@ -44,3 +44,4 @@  int main(void)
 
 /* Verify that both loops were transformed and unrolled.  */
 /* { dg-final { scan-tree-dump-times "Unrolling 2 times." 2 "pcom"} } */
+/* { dg-final { scan-tree-dump-not "Invalid sum" "pcom" } } */
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/predcom-3.c b/gcc/testsuite/gcc.dg/tree-ssa/predcom-3.c
index 9abbe6c1380..aaee3b7ff9e 100644
--- a/gcc/testsuite/gcc.dg/tree-ssa/predcom-3.c
+++ b/gcc/testsuite/gcc.dg/tree-ssa/predcom-3.c
@@ -1,5 +1,5 @@ 
 /* { dg-do compile } */
-/* { dg-options "-O2 -funroll-loops --param max-unroll-times=8 -fpredictive-commoning -fdump-tree-pcom-details -fno-tree-pre -fno-tree-loop-vectorize" } */
+/* { dg-options "-O2 -funroll-loops --param max-unroll-times=8 -fpredictive-commoning -fdump-tree-pcom-details-blocks -fno-tree-pre -fno-tree-loop-vectorize" } */
 
 int a[1000], b[1000];
 
@@ -13,3 +13,4 @@  void test(void)
 
 /* Verify that we used 3 temporary variables for the loop.  */
 /* { dg-final { scan-tree-dump-times "Unrolling 3 times." 1 "pcom"} } */
+/* { dg-final { scan-tree-dump-not "Invalid sum" "pcom" } } */
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/predcom-4.c b/gcc/testsuite/gcc.dg/tree-ssa/predcom-4.c
index 382a464cef7..af9ae0e0f3d 100644
--- a/gcc/testsuite/gcc.dg/tree-ssa/predcom-4.c
+++ b/gcc/testsuite/gcc.dg/tree-ssa/predcom-4.c
@@ -1,5 +1,5 @@ 
 /* { dg-do run } */
-/* { dg-options "-O2 -funroll-loops --param max-unroll-times=8 -fpredictive-commoning -fdump-tree-pcom-details" } */
+/* { dg-options "-O2 -funroll-loops --param max-unroll-times=8 -fpredictive-commoning -fdump-tree-pcom-details-blocks" } */
 
 /* Test for predictive commoning of expressions, without reassociation.  */
 
@@ -26,3 +26,4 @@  int main(void)
 
 /* { dg-final { scan-tree-dump-times "Combination" 1 "pcom"} } */
 /* { dg-final { scan-tree-dump-times "Unrolling 3 times." 1 "pcom"} } */
+/* { dg-final { scan-tree-dump-not "Invalid sum" "pcom" } } */
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/predcom-5.c b/gcc/testsuite/gcc.dg/tree-ssa/predcom-5.c
index a3ee1d946a7..52adb59d669 100644
--- a/gcc/testsuite/gcc.dg/tree-ssa/predcom-5.c
+++ b/gcc/testsuite/gcc.dg/tree-ssa/predcom-5.c
@@ -1,5 +1,5 @@ 
 /* { dg-do run } */
-/* { dg-options "-O2 -funroll-loops --param max-unroll-times=8 -fpredictive-commoning -fdump-tree-pcom-details" } */
+/* { dg-options "-O2 -funroll-loops --param max-unroll-times=8 -fpredictive-commoning -fdump-tree-pcom-details-blocks" } */
 
 /* Test for predictive commoning of expressions, with reassociation.  */
 
@@ -26,3 +26,4 @@  int main(void)
 
 /* { dg-final { scan-tree-dump-times "Combination" 2 "pcom"} } */
 /* { dg-final { scan-tree-dump-times "Unrolling 3 times." 1 "pcom"} } */
+/* { dg-final { scan-tree-dump-not "Invalid sum" "pcom" } } */
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/predcom-7.c b/gcc/testsuite/gcc.dg/tree-ssa/predcom-7.c
index 683fb9b5d35..99939767562 100644
--- a/gcc/testsuite/gcc.dg/tree-ssa/predcom-7.c
+++ b/gcc/testsuite/gcc.dg/tree-ssa/predcom-7.c
@@ -1,5 +1,5 @@ 
 /* { dg-do run } */
-/* { dg-options "-O3 -fdump-tree-pcom-details" } */
+/* { dg-options "-O3 -fdump-tree-pcom-details-blocks" } */
 
 int b, f, d[5][2];
 unsigned int c;
@@ -15,3 +15,7 @@  main ()
 }
 
 /* { dg-final { scan-tree-dump "Executing predictive commoning" "pcom" } } */
+/* dom pass introduces one mismatch after simplfying mispredicted conditional
+   on c being non-zero on first iteration.  This happens since c is global variable
+   and needs alias analysis.  */
+/* { dg-final { scan-tree-dump-times "Invalid sum" 1 "pcom" } } */
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/predcom-8.c b/gcc/testsuite/gcc.dg/tree-ssa/predcom-8.c
index c4562768398..dcddf573145 100644
--- a/gcc/testsuite/gcc.dg/tree-ssa/predcom-8.c
+++ b/gcc/testsuite/gcc.dg/tree-ssa/predcom-8.c
@@ -1,5 +1,5 @@ 
 /* { dg-do compile } */
-/* { dg-options "-O3 -fdump-tree-pcom-details" } */
+/* { dg-options "-O3 -fdump-tree-pcom-details-blocks" } */
 
 int is_sorted(int *a, int n)
 {
@@ -10,3 +10,4 @@  int is_sorted(int *a, int n)
 }
 
 /* { dg-final { scan-tree-dump "Executing predictive commoning without unrolling" "pcom" } } */
+/* { dg-final { scan-tree-dump-not "Invalid sum" "pcom" } } */
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/predcom-dse-1.c b/gcc/testsuite/gcc.dg/tree-ssa/predcom-dse-1.c
index b61b651fca8..a0a04a08c61 100644
--- a/gcc/testsuite/gcc.dg/tree-ssa/predcom-dse-1.c
+++ b/gcc/testsuite/gcc.dg/tree-ssa/predcom-dse-1.c
@@ -1,5 +1,5 @@ 
 /* { dg-do run } */
-/* { dg-options "-O2 -fno-inline -fno-tree-loop-distribute-patterns -fpredictive-commoning -fdump-tree-pcom-details" } */
+/* { dg-options "-O2 -fno-inline -fno-tree-loop-distribute-patterns -fpredictive-commoning -fdump-tree-pcom-details-blocks" } */
 
 int arr[105] = {2, 3, 5, 7, 11};
 int result0[10] = {2, 3, 5, 7, 11};
@@ -60,3 +60,4 @@  int main (void)
   return 0;
 }
 /* { dg-final { scan-tree-dump "Store-stores chain" "pcom"} } */
+/* { dg-final { scan-tree-dump-not "Invalid sum" "pcom" } } */
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/predcom-dse-10.c b/gcc/testsuite/gcc.dg/tree-ssa/predcom-dse-10.c
index bd5575d9502..f770a8ad812 100644
--- a/gcc/testsuite/gcc.dg/tree-ssa/predcom-dse-10.c
+++ b/gcc/testsuite/gcc.dg/tree-ssa/predcom-dse-10.c
@@ -1,5 +1,5 @@ 
 /* { dg-do run } */
-/* { dg-options "-O2 -fno-inline -fpredictive-commoning -fdump-tree-pcom-details" } */
+/* { dg-options "-O2 -fno-inline -fpredictive-commoning -fdump-tree-pcom-details-blocks" } */
 
 int arr[105] = {2, 3, 5, 7, 11};
 int result0[10] = {2, 3, 5, 7, 11};
@@ -41,4 +41,4 @@  int main (void)
   return 0;
 }
 /* { dg-final { scan-tree-dump-not "Store-stores chain" "pcom"} } */
-
+/* { dg-final { scan-tree-dump-not "Invalid sum" "pcom" } } */
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/predcom-dse-11.c b/gcc/testsuite/gcc.dg/tree-ssa/predcom-dse-11.c
index 9e496f68a12..ed2b96a0d1a 100644
--- a/gcc/testsuite/gcc.dg/tree-ssa/predcom-dse-11.c
+++ b/gcc/testsuite/gcc.dg/tree-ssa/predcom-dse-11.c
@@ -1,5 +1,5 @@ 
 /* { dg-do run } */
-/* { dg-options "-O2 -fno-inline -fpredictive-commoning -fdump-tree-pcom-details" } */
+/* { dg-options "-O2 -fno-inline -fpredictive-commoning -fdump-tree-pcom-details-blocks" } */
 
 int arr[105] = {2, 3, 5, 7, 11};
 int x[105] = {2, 3, 5, 7, 11};
@@ -48,4 +48,5 @@  int main (void)
 }
 /* { dg-final { scan-tree-dump "Store-stores chain" "pcom"} } */
 /* { dg-final { scan-tree-dump "Store-loads chain" "pcom"} } */
+/* { dg-final { scan-tree-dump-not "Invalid sum" "pcom" } } */
 
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/predcom-dse-12.c b/gcc/testsuite/gcc.dg/tree-ssa/predcom-dse-12.c
index 510c600f574..2487c1c8205 100644
--- a/gcc/testsuite/gcc.dg/tree-ssa/predcom-dse-12.c
+++ b/gcc/testsuite/gcc.dg/tree-ssa/predcom-dse-12.c
@@ -1,5 +1,5 @@ 
 /* { dg-do run } */
-/* { dg-options "-O2 -fno-inline -fpredictive-commoning -fdump-tree-pcom-details" } */
+/* { dg-options "-O2 -fno-inline -fpredictive-commoning -fdump-tree-pcom-details-blocks" } */
 
 int arr[105] = {2, 3, 5, 7, 11};
 int result0[10] = {2, 3, 5, 7, 11};
@@ -65,3 +65,4 @@  int main (void)
   return 0;
 }
 /* { dg-final { scan-tree-dump "Store-stores chain" "pcom"} } */
+/* { dg-final { scan-tree-dump-not "Invalid sum" "pcom" } } */
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/predcom-dse-2.c b/gcc/testsuite/gcc.dg/tree-ssa/predcom-dse-2.c
index 7f959a833f4..020ca705790 100644
--- a/gcc/testsuite/gcc.dg/tree-ssa/predcom-dse-2.c
+++ b/gcc/testsuite/gcc.dg/tree-ssa/predcom-dse-2.c
@@ -1,5 +1,5 @@ 
 /* { dg-do run } */
-/* { dg-options "-O2 -fno-inline -fno-tree-loop-distribute-patterns -fpredictive-commoning -fdump-tree-pcom-details" } */
+/* { dg-options "-O2 -fno-inline -fno-tree-loop-distribute-patterns -fpredictive-commoning -fdump-tree-pcom-details-blocks" } */
 
 int arr[105] = {2, 3, 5, 7, 11};
 int result0[10] = {2, 3, 5, 7, 11};
@@ -60,3 +60,4 @@  int main (void)
   return 0;
 }
 /* { dg-final { scan-tree-dump "Store-stores chain" "pcom"} } */
+/* { dg-final { scan-tree-dump-not "Invalid sum" "pcom" } } */
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/predcom-dse-3.c b/gcc/testsuite/gcc.dg/tree-ssa/predcom-dse-3.c
index 1fc8f089345..667cc333d9f 100644
--- a/gcc/testsuite/gcc.dg/tree-ssa/predcom-dse-3.c
+++ b/gcc/testsuite/gcc.dg/tree-ssa/predcom-dse-3.c
@@ -1,5 +1,5 @@ 
 /* { dg-do run } */
-/* { dg-options "-O2 -fno-tree-vectorize -fno-inline -fno-tree-loop-distribute-patterns -fpredictive-commoning -fdump-tree-pcom-details" } */
+/* { dg-options "-O2 -fno-tree-vectorize -fno-inline -fno-tree-loop-distribute-patterns -fpredictive-commoning -fdump-tree-pcom-details-blocks" } */
 
 int arr1[105] = {2, 3, 5, 7, 11, 13, 0};
 int arr2[105] = {2, 3, 5, 7, 11, 13, 0};
@@ -106,3 +106,4 @@  int main (void)
   return 0;
 }
 /* { dg-final { scan-tree-dump-times "Store-stores chain" 4 "pcom"} } */
+/* { dg-final { scan-tree-dump-not "Invalid sum" "pcom" } } */
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/predcom-dse-4.c b/gcc/testsuite/gcc.dg/tree-ssa/predcom-dse-4.c
index f97d32f4b41..8118461af0b 100644
--- a/gcc/testsuite/gcc.dg/tree-ssa/predcom-dse-4.c
+++ b/gcc/testsuite/gcc.dg/tree-ssa/predcom-dse-4.c
@@ -1,5 +1,5 @@ 
 /* { dg-do run } */
-/* { dg-options "-O2 -fno-inline -fno-tree-loop-distribute-patterns -fpredictive-commoning -fdump-tree-pcom-details" } */
+/* { dg-options "-O2 -fno-inline -fno-tree-loop-distribute-patterns -fpredictive-commoning -fdump-tree-pcom-details-blocks" } */
 
 int arr[105] = {2, 3, 5, 7, 11};
 int result0[10] = {2, 3, 5, 7, 11};
@@ -59,3 +59,4 @@  int main (void)
   return 0;
 }
 /* { dg-final { scan-tree-dump "Store-stores chain" "pcom"} } */
+/* { dg-final { scan-tree-dump-not "Invalid sum" "pcom" } } */
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/predcom-dse-5.c b/gcc/testsuite/gcc.dg/tree-ssa/predcom-dse-5.c
index a13d56098be..03fa646661e 100644
--- a/gcc/testsuite/gcc.dg/tree-ssa/predcom-dse-5.c
+++ b/gcc/testsuite/gcc.dg/tree-ssa/predcom-dse-5.c
@@ -1,5 +1,5 @@ 
 /* { dg-do run } */
-/* { dg-options "-O2 -fno-inline -fpredictive-commoning -fdump-tree-pcom-details" } */
+/* { dg-options "-O2 -fno-inline -fpredictive-commoning -fdump-tree-pcom-details-blocks" } */
 
 int arr[105] = {2, 3, 5, 7, 11};
 int result0[10] = {2, 3, 5, 7, 11};
@@ -61,3 +61,4 @@  int main (void)
   return 0;
 }
 /* { dg-final { scan-tree-dump "Store-stores chain" "pcom"} } */
+/* { dg-final { scan-tree-dump-not "Invalid sum" "pcom" } } */
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/predcom-dse-6.c b/gcc/testsuite/gcc.dg/tree-ssa/predcom-dse-6.c
index 63d6c8f33b0..ab2fd403d30 100644
--- a/gcc/testsuite/gcc.dg/tree-ssa/predcom-dse-6.c
+++ b/gcc/testsuite/gcc.dg/tree-ssa/predcom-dse-6.c
@@ -1,5 +1,5 @@ 
 /* { dg-do run } */
-/* { dg-options "-O2 -fno-inline -fpredictive-commoning -fdump-tree-pcom-details" } */
+/* { dg-options "-O2 -fno-inline -fpredictive-commoning -fdump-tree-pcom-details-blocks" } */
 
 int arr[105] = {2, 3, 5, 7, 11, 13, 17, 19};
 int result0[10] = {2, 3, 5, 7, 11, 13, 17, 19};
@@ -63,3 +63,4 @@  int main (void)
   return 0;
 }
 /* { dg-final { scan-tree-dump "Store-stores chain" "pcom"} } */
+/* { dg-final { scan-tree-dump-not "Invalid sum" "pcom" } } */
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/predcom-dse-7.c b/gcc/testsuite/gcc.dg/tree-ssa/predcom-dse-7.c
index 0bde6e6dced..c746ebd7155 100644
--- a/gcc/testsuite/gcc.dg/tree-ssa/predcom-dse-7.c
+++ b/gcc/testsuite/gcc.dg/tree-ssa/predcom-dse-7.c
@@ -1,5 +1,5 @@ 
 /* { dg-do run } */
-/* { dg-options "-O2 -fno-inline -fpredictive-commoning -fdump-tree-pcom-details" } */
+/* { dg-options "-O2 -fno-inline -fpredictive-commoning -fdump-tree-pcom-details-blocks" } */
 
 int arr[105] = {2, 3, 5, 7, 11, 13, 17, 19};
 int result0[10] = {2, 3, 5, 7, 11, 13, 17, 19};
@@ -61,3 +61,4 @@  int main (void)
   return 0;
 }
 /* { dg-final { scan-tree-dump "Store-stores chain" "pcom"} } */
+/* { dg-final { scan-tree-dump-not "Invalid sum" "pcom" } } */
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/predcom-dse-8.c b/gcc/testsuite/gcc.dg/tree-ssa/predcom-dse-8.c
index 45ffd25c424..6c4e9afa487 100644
--- a/gcc/testsuite/gcc.dg/tree-ssa/predcom-dse-8.c
+++ b/gcc/testsuite/gcc.dg/tree-ssa/predcom-dse-8.c
@@ -1,5 +1,5 @@ 
 /* { dg-do run } */
-/* { dg-options "-O2 -fno-inline -fpredictive-commoning -fdump-tree-pcom-details" } */
+/* { dg-options "-O2 -fno-inline -fpredictive-commoning -fdump-tree-pcom-details-blocks" } */
 
 int arr[105] = {2, 3, 5, 7, 11, 13, 17, 19};
 int result0[10] = {2, 3, 5, 7, 11, 13, 17, 19};
@@ -58,3 +58,4 @@  int main (void)
 
   return 0;
 }
+/* { dg-final { scan-tree-dump-not "Invalid sum" "pcom" } } */
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/predcom-dse-9.c b/gcc/testsuite/gcc.dg/tree-ssa/predcom-dse-9.c
index 1c4e3140309..9c5e8ca9a79 100644
--- a/gcc/testsuite/gcc.dg/tree-ssa/predcom-dse-9.c
+++ b/gcc/testsuite/gcc.dg/tree-ssa/predcom-dse-9.c
@@ -1,5 +1,5 @@ 
 /* { dg-do run } */
-/* { dg-options "-O2 -fno-inline -fpredictive-commoning -fdump-tree-pcom-details" } */
+/* { dg-options "-O2 -fno-inline -fpredictive-commoning -fdump-tree-pcom-details-blocks" } */
 
 int arr1[105] = {2, 3, 5, 7, 11, 13, 17, 19};
 int arr2[105] = {2, 3, 5, 7, 11, 13, 17, 19};
@@ -88,3 +88,4 @@  int main (void)
   return 0;
 }
 /* { dg-final { scan-tree-dump "Store-stores chain" "pcom"} } */
+/* { dg-final { scan-tree-dump-not "Invalid sum" "pcom" } } */
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/update-unroll-1.c b/gcc/testsuite/gcc.dg/tree-ssa/update-unroll-1.c
index 138448bac43..23dd5c0d570 100644
--- a/gcc/testsuite/gcc.dg/tree-ssa/update-unroll-1.c
+++ b/gcc/testsuite/gcc.dg/tree-ssa/update-unroll-1.c
@@ -16,5 +16,4 @@  int foo(unsigned n)
 /* We used to make the probability that the body of the loop (unrolled
    to enable prefetching) is entered 0, which is not correct.  */
 
-/* { dg-final { scan-tree-dump-not "Invalid sum" "aprefetch" { xfail *-*-* }} } */
 /* { dg-final { scan-tree-dump-not "SUCC: 7 .100.0%" "aprefetch"} } */
diff --git a/gcc/tree-ssa-loop-manip.cc b/gcc/tree-ssa-loop-manip.cc
index b2d3dcf406d..8e3b1057b6f 100644
--- a/gcc/tree-ssa-loop-manip.cc
+++ b/gcc/tree-ssa-loop-manip.cc
@@ -1040,71 +1040,6 @@  determine_exit_conditions (class loop *loop, class tree_niter_desc *desc,
   *exit_bound = bound;
 }
 
-/* Scales the frequencies of all basic blocks in LOOP that are strictly
-   dominated by BB by NUM/DEN.  */
-
-static void
-scale_dominated_blocks_in_loop (class loop *loop, basic_block bb,
-				profile_count num, profile_count den)
-{
-  basic_block son;
-
-  if (!den.nonzero_p () && !(num == profile_count::zero ()))
-    return;
-
-  for (son = first_dom_son (CDI_DOMINATORS, bb);
-       son;
-       son = next_dom_son (CDI_DOMINATORS, son))
-    {
-      if (!flow_bb_inside_loop_p (loop, son))
-	continue;
-      scale_bbs_frequencies_profile_count (&son, 1, num, den);
-      scale_dominated_blocks_in_loop (loop, son, num, den);
-    }
-}
-
-/* Return estimated niter for LOOP after unrolling by FACTOR times.  */
-
-gcov_type
-niter_for_unrolled_loop (class loop *loop, unsigned factor)
-{
-  gcc_assert (factor != 0);
-  bool profile_p = false;
-  gcov_type est_niter = expected_loop_iterations_unbounded (loop, &profile_p);
-  /* Note that this is really CEIL (est_niter + 1, factor) - 1, where the
-     "+ 1" converts latch iterations to loop iterations and the "- 1"
-     converts back.  */
-  gcov_type new_est_niter = est_niter / factor;
-
-  if (est_niter == -1)
-    return -1;
-
-  /* Without profile feedback, loops for which we do not know a better estimate
-     are assumed to roll 10 times.  When we unroll such loop, it appears to
-     roll too little, and it may even seem to be cold.  To avoid this, we
-     ensure that the created loop appears to roll at least 5 times (but at
-     most as many times as before unrolling).  Don't do adjustment if profile
-     feedback is present.  */
-  if (new_est_niter < 5 && !profile_p)
-    {
-      if (est_niter < 5)
-	new_est_niter = est_niter;
-      else
-	new_est_niter = 5;
-    }
-
-  if (loop->any_upper_bound)
-    {
-      /* As above, this is really CEIL (upper_bound + 1, factor) - 1.  */
-      widest_int bound = wi::udiv_floor (loop->nb_iterations_upper_bound,
-					 factor);
-      if (wi::ltu_p (bound, new_est_niter))
-	new_est_niter = bound.to_uhwi ();
-    }
-
-  return new_est_niter;
-}
-
 /* Unroll LOOP FACTOR times.  LOOP is known to have a single exit edge
    whose source block dominates the latch.  DESC describes the number of
    iterations of LOOP.
@@ -1169,47 +1104,39 @@  tree_transform_and_unroll_loop (class loop *loop, unsigned factor,
 				transform_callback transform,
 				void *data)
 {
-  gcov_type new_est_niter = niter_for_unrolled_loop (loop, factor);
   unsigned irr = loop_preheader_edge (loop)->flags & EDGE_IRREDUCIBLE_LOOP;
 
   enum tree_code exit_cmp;
   tree enter_main_cond, exit_base, exit_step, exit_bound;
+  bool flat = maybe_flat_loop_profile (loop);
   determine_exit_conditions (loop, desc, factor,
 			     &enter_main_cond, &exit_base, &exit_step,
 			     &exit_cmp, &exit_bound);
   bool single_loop_p = !exit_base;
 
-  /* Let us assume that the unrolled loop is quite likely to be entered.  */
-  profile_probability prob_entry;
-  if (integer_nonzerop (enter_main_cond))
-    prob_entry = profile_probability::always ();
-  else
-    prob_entry = profile_probability::guessed_always ()
-			.apply_scale (PROB_UNROLLED_LOOP_ENTERED, 100);
-
   gcond *exit_if = nullptr;
   class loop *new_loop = nullptr;
   edge new_exit;
   if (!single_loop_p)
     {
-      edge exit = single_dom_exit (loop);
+      profile_count entry_count = loop_preheader_edge (loop)->src->count;
+      /* Let us assume that the unrolled loop is quite likely to be entered.  */
+      profile_probability prob_entry;
+      if (integer_nonzerop (enter_main_cond))
+	prob_entry = profile_probability::always ();
+      else
+	prob_entry = profile_probability::guessed_always ()
+			    .apply_scale (PROB_UNROLLED_LOOP_ENTERED, 100);
+
 
       /* The values for scales should keep profile consistent, and somewhat
-	 close to correct.
-
-	 TODO: The current value of SCALE_REST makes it appear that the loop
-	 that is created by splitting the remaining iterations of the unrolled
-	 loop is executed the same number of times as the original loop, and
-	 with the same frequencies, which is obviously wrong.  This does not
-	 appear to cause problems, so we do not bother with fixing it for now.
-	 To make the profile correct, we would need to change the probability
-	 of the exit edge of the loop, and recompute the distribution of
-	 frequencies in its body because of this change (scale the frequencies
-	 of blocks before and after the exit by appropriate factors).  */
-      profile_probability scale_unrolled = prob_entry;
+	 close to correct.  */
       new_loop = loop_version (loop, enter_main_cond, NULL, prob_entry,
-			       prob_entry.invert (), scale_unrolled,
-			       profile_probability::guessed_always (),
+			       prob_entry.invert (),
+			       prob_entry,
+			       /* We will later redirect exit from vectorized
+				  loop to new_loop.  */
+			       profile_probability::always (),
 			       true);
       gcc_assert (new_loop != NULL);
       update_ssa (TODO_update_ssa_no_phi);
@@ -1220,18 +1147,16 @@  tree_transform_and_unroll_loop (class loop *loop, unsigned factor,
       edge precond_edge = single_pred_edge (rest);
       split_edge (loop_latch_edge (loop));
       basic_block exit_bb = single_pred (loop->latch);
+      edge exit = single_dom_exit (loop);
 
       /* Since the exit edge will be removed, the frequency of all the blocks
-	 in the loop that are dominated by it must be scaled by
-	 1 / (1 - exit->probability).  */
+	 in the loop that are dominated by it must be scaled.  */
       if (exit->probability.initialized_p ())
 	scale_dominated_blocks_in_loop (loop, exit->src,
 					/* We are scaling up here so
 					   probability does not fit.  */
-					loop->header->count,
-					loop->header->count
-					- loop->header->count.apply_probability
-					    (exit->probability));
+					exit->src->count,
+					exit->src->count - exit->count ());
 
       gimple_stmt_iterator bsi = gsi_last_bb (exit_bb);
       exit_if = gimple_build_cond (EQ_EXPR, integer_zero_node,
@@ -1243,14 +1168,14 @@  tree_transform_and_unroll_loop (class loop *loop, unsigned factor,
       rescan_loop_exit (new_exit, true, false);
 
       /* Set the probability of new exit to the same of the old one.  Fix
-	 the frequency of the latch block, by scaling it back by
-	 1 - exit->probability.  */
+	 the count of the latch block.  */
       new_exit->probability = exit->probability;
       edge new_nonexit = single_pred_edge (loop->latch);
       new_nonexit->probability = exit->probability.invert ();
       new_nonexit->flags = EDGE_TRUE_VALUE;
-      if (new_nonexit->probability.initialized_p ())
-	scale_bbs_frequencies (&loop->latch, 1, new_nonexit->probability);
+      set_edge_probability_and_rescale_others
+	      (exit, profile_probability::never ());
+      loop->latch->count = new_nonexit->count ();
 
       edge old_entry = loop_preheader_edge (loop);
       edge new_entry = loop_preheader_edge (new_loop);
@@ -1296,12 +1221,21 @@  tree_transform_and_unroll_loop (class loop *loop, unsigned factor,
 	}
 
       remove_path (exit);
+      /* We will later redirect exit from vectorized loop to new_loop.  */
+      loop_preheader_edge (new_loop)->src->count = entry_count;
 
       /* The epilog loop latch executes at most factor - 1 times.
 	 Since the epilog is entered unconditionally it will need to handle
 	 up to factor executions of its body.  */
-      new_loop->any_upper_bound = 1;
+      new_loop->any_upper_bound = true;
       new_loop->nb_iterations_upper_bound = factor - 1;
+      /* We do not really know estimate on number of iterations, since we do not
+	 track any estimates modulo unroll factor.
+	 Drop estimate from loop_info and scale loop profile.
+	 It may be more realistic to scale loop profile to factor / 2 - 1,
+	 but vectorizer also uses factor - 1.  */
+      new_loop->any_estimate = false;
+      scale_loop_profile (new_loop, profile_probability::always (), factor - 1);
     }
   else
     new_exit = single_dom_exit (loop);
@@ -1318,10 +1252,10 @@  tree_transform_and_unroll_loop (class loop *loop, unsigned factor,
 
   auto_vec<edge> to_remove;
   bool ok
-    = gimple_duplicate_loop_body_to_header_edge (loop, loop_latch_edge (loop),
-						 factor - 1, wont_exit,
-						 new_exit, &to_remove,
-						 DLTHE_FLAG_UPDATE_FREQ);
+    = gimple_duplicate_loop_body_to_header_edge
+	    (loop, loop_latch_edge (loop), factor - 1, wont_exit,
+	     new_exit, &to_remove,
+	     DLTHE_FLAG_UPDATE_FREQ | (flat ? DLTHE_FLAG_FLAT_PROFILE : 0));
   gcc_assert (ok);
 
   for (edge e : to_remove)
@@ -1332,36 +1266,25 @@  tree_transform_and_unroll_loop (class loop *loop, unsigned factor,
   update_ssa (TODO_update_ssa);
 
   new_exit = single_dom_exit (loop);
+
+  /* gimple_duplicate_loop_body_to_header_edge depending on
+     DLTHE_FLAG_UPDATE_FREQ either keeps original frequency of the loop header
+     or scales it down accordingly.
+     However exit edge probability is kept as original.  Fix it if needed
+     and compensate.  */
+  profile_probability new_prob
+	  = loop_preheader_edge
+		  (loop)->count ().probability_in (new_exit->src->count);
+  if (!(new_prob == new_exit->probability))
+    {
+      profile_count old_count = new_exit->src->count - new_exit->count ();
+      set_edge_probability_and_rescale_others (new_exit, new_prob);
+      profile_count new_count = new_exit->src->count - new_exit->count ();
+      scale_dominated_blocks_in_loop (loop, new_exit->src,
+				      new_count, old_count);
+    }
   if (!single_loop_p)
     {
-      /* Ensure that the frequencies in the loop match the new estimated
-	 number of iterations, and change the probability of the new
-	 exit edge.  */
-
-      profile_count freq_h = loop->header->count;
-      profile_count freq_e = (loop_preheader_edge (loop))->count ();
-      if (freq_h.nonzero_p ())
-	{
-	  /* Avoid dropping loop body profile counter to 0 because of zero
-	     count in loop's preheader.  */
-	  if (freq_h.nonzero_p () && !(freq_e == profile_count::zero ()))
-	    freq_e = freq_e.force_nonzero ();
-	  scale_loop_frequencies (loop, freq_e.probability_in (freq_h));
-	}
-
-      basic_block rest = new_exit->dest;
-      new_exit->probability
-	= (profile_probability::always () / (new_est_niter + 1));
-
-      rest->count += new_exit->count ();
-
-      edge new_nonexit = single_pred_edge (loop->latch);
-      profile_probability prob = new_nonexit->probability;
-      new_nonexit->probability = new_exit->probability.invert ();
-      prob = new_nonexit->probability / prob;
-      if (prob.initialized_p ())
-	scale_bbs_frequencies (&loop->latch, 1, prob);
-
       /* Finally create the new counter for number of iterations and add
 	 the new exit instruction.  */
       tree ctr_before, ctr_after;
@@ -1374,66 +1297,15 @@  tree_transform_and_unroll_loop (class loop *loop, unsigned factor,
       gimple_cond_set_rhs (exit_if, exit_bound);
       update_stmt (exit_if);
     }
-  else
-    {
-      /* gimple_duplicate_loop_to_header_edge has adjusted the loop body's
-	 original profile counts in line with the unroll factor.  However,
-	 the old counts might not have been consistent with the old
-	 iteration count.
-
-	 Therefore, if the iteration count is known exactly, make sure that the
-	 profile counts of the loop header (and any other blocks that might be
-	 executed in the final iteration) are consistent with the combination
-	 of (a) the incoming profile count and (b) the new iteration count.  */
-      profile_count in_count = loop_preheader_edge (loop)->count ();
-      profile_count old_header_count = loop->header->count;
-      if (in_count.nonzero_p ()
-	  && old_header_count.nonzero_p ()
-	  && TREE_CODE (desc->niter) == INTEGER_CST)
-	{
-	  /* The + 1 converts latch counts to iteration counts.  */
-	  profile_count new_header_count = in_count * (new_est_niter + 1);
-	  basic_block *body = get_loop_body (loop);
-	  scale_bbs_frequencies_profile_count (body, loop->num_nodes,
-					       new_header_count,
-					       old_header_count);
-	  free (body);
-	}
-
-      /* gimple_duplicate_loop_to_header_edge discarded FACTOR - 1
-	 exit edges and adjusted the loop body's profile counts for the
-	 new probabilities of the remaining non-exit edges.  However,
-	 the remaining exit edge still has the same probability as it
-	 did before, even though it is now more likely.
-
-	 Therefore, all blocks executed after a failed exit test now have
-	 a profile count that is too high, and the sum of the profile counts
-	 for the header's incoming edges is greater than the profile count
-	 of the header itself.
-
-	 Adjust the profile counts of all code in the loop body after
-	 the exit test so that the sum of the counts on entry to the
-	 header agree.  */
-      profile_count old_latch_count = loop_latch_edge (loop)->count ();
-      profile_count new_latch_count = loop->header->count - in_count;
-      if (old_latch_count.nonzero_p () && new_latch_count.nonzero_p ())
-	scale_dominated_blocks_in_loop (loop, new_exit->src, new_latch_count,
-					old_latch_count);
-
-      /* Set the probability of the exit edge based on NEW_EST_NITER
-	 (which estimates latch counts rather than iteration counts).
-	 Update the probabilities of other edges to match.
-
-	 If the profile counts are large enough to give the required
-	 precision, the updates above will have made
-
-	    e->dest->count / e->src->count ~= new e->probability
-
-	 for every outgoing edge e of NEW_EXIT->src.  */
-      profile_probability new_exit_prob
-	= profile_probability::always () / (new_est_niter + 1);
-      change_edge_frequency (new_exit, new_exit_prob);
-    }
+  if (loop->any_upper_bound)
+    loop->nb_iterations_upper_bound = wi::udiv_floor
+		(loop->nb_iterations_upper_bound + 1, factor) - 1;
+  if (loop->any_likely_upper_bound)
+    loop->nb_iterations_likely_upper_bound = wi::udiv_floor
+		(loop->nb_iterations_likely_upper_bound + 1, factor) - 1;
+  if (loop->any_estimate)
+    loop->nb_iterations_estimate = wi::udiv_floor
+		(loop->nb_iterations_estimate + 1, factor) - 1;
 
   checking_verify_flow_info ();
   checking_verify_loop_structure ();