diff mbox series

profile-count: Avoid overflows into uninitialized [PR112303]

Message ID ZgVM33rUwO4L1f3o@tucnak
State New
Headers show
Series profile-count: Avoid overflows into uninitialized [PR112303] | expand

Commit Message

Jakub Jelinek March 28, 2024, 10:56 a.m. UTC
Hi!

The testcase in the patch ICEs with

	Jakub

Comments

Jan Hubicka March 28, 2024, 1:07 p.m. UTC | #1
> __attribute__((noipa)) void
> bar (void)
> {
>   __builtin_exit (0);
> }
> 
> __attribute__((noipa)) void
> foo (void)
> {
>   for (int i = 0; i < 1000; ++i)
>   for (int j = 0; j < 1000; ++j)
>   for (int k = 0; k < 1000; ++k)
>   for (int l = 0; l < 1000; ++l)
>   for (int m = 0; m < 1000; ++m)
>   for (int n = 0; n < 1000; ++n)
>   for (int o = 0; o < 1000; ++o)
>   for (int p = 0; p < 1000; ++p)
>   for (int q = 0; q < 1000; ++q)
>   for (int r = 0; r < 1000; ++r)
>   for (int s = 0; s < 1000; ++s)
>   for (int t = 0; t < 1000; ++t)
>   for (int u = 0; u < 1000; ++u)
>   for (int v = 0; v < 1000; ++v)
>   for (int w = 0; w < 1000; ++w)
>   for (int x = 0; x < 1000; ++x)
>   for (int y = 0; y < 1000; ++y)
>   for (int z = 0; z < 1000; ++z)
>   for (int a = 0; a < 1000; ++a)
>   for (int b = 0; b < 1000; ++b)
>     bar ();
> }
> 
> int
> main ()
> {
>   foo ();
> }
> reaches the maximum count already on the 11th loop.

This should not happen - this is a reason why we esimate in sreals and
convert to profile_counts only later.  In this case we should push down
the profile_count of entry block (to 0)

  freq_max = 0;
  FOR_EACH_BB_FN (bb, cfun)
    if (freq_max < BLOCK_INFO (bb)->frequency)
      freq_max = BLOCK_INFO (bb)->frequency;

  /* Scaling frequencies up to maximal profile count may result in
     frequent overflows especially when inlining loops.
     Small scalling results in unnecesary precision loss.  Stay in
     the half of the (exponential) range.  */
  freq_max = (sreal (1) << (profile_count::n_bits / 2)) / freq_max;
  if (freq_max < 16)
    freq_max = 16;

I am looking on what goes wrong here.
Honza
Jan Hubicka March 28, 2024, 1:45 p.m. UTC | #2
Hi,
so what goes wrong with the testcase is the fact that after recursive
inliing we have large loop nest and consequently invalid profile since
every loop is predicted to iterate quite a lot.  Rebuild_frequences
should take care of the problem, but it doesn't since there is:
      if (freq_max < 16)
	freq_max = 16;
Removing this check solves the testcase.  Looking how it went in, I made
it in 2017 when dropping the original code to scale into range 0...10000
https://gcc.gnu.org/pipermail/gcc-patches/2017-November/488115.html

I have no recolleciton of inventing that check, but I suppose one can
argue that we do not want to scale most of CFG to 0 since the branch
prediciton is likely wrong and we do not know if the code with
unrealistic BB profile is important at all.  So perhaps it is safer to
cap rather than scale most of function body to 0.

profile_count arithmetics is indeed supposed to be saturating, it is bad
I managed to miss the check for such a common operation as + :(
> 
> 2024-03-27  Jakub Jelinek  <jakub@redhat.com>
> 
> 	PR tree-optimization/112303
> 	* profile-count.h (profile_count::operator+): Perform
> 	addition in uint64_t variable and set m_val to MIN of that
> 	val and max_count.
> 	(profile_count::operator+=): Likewise.
> 	(profile_count::operator-=): Formatting fix.

These two changes as OK.

In apply_probability
> @@ -1127,7 +1129,9 @@ public:
>        if (!initialized_p ())
>  	return uninitialized ();
>        profile_count ret;
> -      ret.m_val = RDIV (m_val * prob, REG_BR_PROB_BASE);
> +      uint64_t tmp;
> +      safe_scale_64bit (m_val, prob, REG_BR_PROB_BASE, &tmp);
> +      ret.m_val = MIN (tmp, max_count);

This exists only for old code that still uses REG_BR_PROB_BAS integer.
Valid prob is always prob <= REG_BR_PROB_BASE :)
So we need safe_scale_64bit to watch overflow, but result does not need
MIN.

>        ret.m_quality = MIN (m_quality, ADJUSTED);
>        return ret;
>      }
> @@ -1145,7 +1149,7 @@ public:
>        uint64_t tmp;
>        safe_scale_64bit (m_val, prob.m_val, profile_probability::max_probability,
>  			&tmp);
> -      ret.m_val = tmp;
> +      ret.m_val = MIN (tmp, max_count);

Same here, it is unnecesary to do MIN.

OK with this change.

Thanks for looking into this,
Honza
diff mbox series

Patch

--- gcc/tree-scalar-evolution.cc
+++ gcc/tree-scalar-evolution.cc
@@ -3881,7 +3881,7 @@  final_value_replacement_loop (class loop *loop)
 
       /* Propagate constants immediately, but leave an unused initialization
         around to avoid invalidating the SCEV cache.  */
-      if (CONSTANT_CLASS_P (def) && !SSA_NAME_OCCURS_IN_ABNORMAL_PHI (rslt))
+      if (0 && CONSTANT_CLASS_P (def) && !SSA_NAME_OCCURS_IN_ABNORMAL_PHI (rslt))
        replace_uses_by (rslt, def);
 
       /* Create the replacement statements.  */
(the addition of the above made the ICE latent), because profile_count
addition doesn't check for overflows and if unlucky, we can even overflow
into the uninitialized value.
Getting really huge profile counts is very easy even when not using
recursive inlining in loops, e.g.
__attribute__((noipa)) void
bar (void)
{
  __builtin_exit (0);
}

__attribute__((noipa)) void
foo (void)
{
  for (int i = 0; i < 1000; ++i)
  for (int j = 0; j < 1000; ++j)
  for (int k = 0; k < 1000; ++k)
  for (int l = 0; l < 1000; ++l)
  for (int m = 0; m < 1000; ++m)
  for (int n = 0; n < 1000; ++n)
  for (int o = 0; o < 1000; ++o)
  for (int p = 0; p < 1000; ++p)
  for (int q = 0; q < 1000; ++q)
  for (int r = 0; r < 1000; ++r)
  for (int s = 0; s < 1000; ++s)
  for (int t = 0; t < 1000; ++t)
  for (int u = 0; u < 1000; ++u)
  for (int v = 0; v < 1000; ++v)
  for (int w = 0; w < 1000; ++w)
  for (int x = 0; x < 1000; ++x)
  for (int y = 0; y < 1000; ++y)
  for (int z = 0; z < 1000; ++z)
  for (int a = 0; a < 1000; ++a)
  for (int b = 0; b < 1000; ++b)
    bar ();
}

int
main ()
{
  foo ();
}
reaches the maximum count already on the 11th loop.

Some other methods of profile_count like apply_scale already
do use MIN (val, max_count) before assignment to m_val, this patch
just extends that to other methods.
Furthermore, one overload of apply_probability wasn't using
safe_scale_64bit and so could very easily overflow as well
- prob is required to be [0, 10000] and if m_val is near the max_count,
it can overflow even with multiplications by 8.

Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?

I wonder if we also shouldn't do
if (safe_scale_64bit (..., &tmp)) tmp = max_count;
in the existing as well as new spots, because if safe_scale_64bit returns
true, it just wraps around I think.

2024-03-27  Jakub Jelinek  <jakub@redhat.com>

	PR tree-optimization/112303
	* profile-count.h (profile_count::operator+): Perform
	addition in uint64_t variable and set m_val to MIN of that
	val and max_count.
	(profile_count::operator+=): Likewise.
	(profile_count::operator-=): Formatting fix.
	(profile_count::apply_probability): Use safe_scale_64bit
	even in the int overload.  Set m_val to MIN of tmp and
	max_count.

	* gcc.c-torture/compile/pr112303.c: New test.

--- gcc/profile-count.h.jj	2024-02-22 13:07:22.870344133 +0100
+++ gcc/profile-count.h	2024-03-27 15:16:16.461774110 +0100
@@ -910,7 +910,8 @@  public:
 
       profile_count ret;
       gcc_checking_assert (compatible_p (other));
-      ret.m_val = m_val + other.m_val;
+      uint64_t ret_val = m_val + other.m_val;
+      ret.m_val = MIN (ret_val, max_count);
       ret.m_quality = MIN (m_quality, other.m_quality);
       return ret;
     }
@@ -929,7 +930,8 @@  public:
       else
 	{
           gcc_checking_assert (compatible_p (other));
-	  m_val += other.m_val;
+	  uint64_t ret_val = m_val + other.m_val;
+	  m_val = MIN (ret_val, max_count);
 	  m_quality = MIN (m_quality, other.m_quality);
 	}
       return *this;
@@ -957,7 +959,7 @@  public:
       else
 	{
           gcc_checking_assert (compatible_p (other));
-	  m_val = m_val >= other.m_val ? m_val - other.m_val: 0;
+	  m_val = m_val >= other.m_val ? m_val - other.m_val : 0;
 	  m_quality = MIN (m_quality, other.m_quality);
 	}
       return *this;
@@ -1127,7 +1129,9 @@  public:
       if (!initialized_p ())
 	return uninitialized ();
       profile_count ret;
-      ret.m_val = RDIV (m_val * prob, REG_BR_PROB_BASE);
+      uint64_t tmp;
+      safe_scale_64bit (m_val, prob, REG_BR_PROB_BASE, &tmp);
+      ret.m_val = MIN (tmp, max_count);
       ret.m_quality = MIN (m_quality, ADJUSTED);
       return ret;
     }
@@ -1145,7 +1149,7 @@  public:
       uint64_t tmp;
       safe_scale_64bit (m_val, prob.m_val, profile_probability::max_probability,
 			&tmp);
-      ret.m_val = tmp;
+      ret.m_val = MIN (tmp, max_count);
       ret.m_quality = MIN (m_quality, prob.m_quality);
       return ret;
     }
--- gcc/testsuite/gcc.c-torture/compile/pr112303.c.jj	2024-03-27 15:16:57.873214557 +0100
+++ gcc/testsuite/gcc.c-torture/compile/pr112303.c	2024-03-26 12:04:18.741670482 +0100
@@ -0,0 +1,25 @@ 
+/* PR tree-optimization/112303 */
+
+int a, b, d, e, f, **g, h;
+char c;
+
+int *
+foo (void)
+{
+  for (int i = 0; i < 3; i++)
+    {
+      for (h = 0; h < 2; h++)
+	;
+      if (!b)
+	break;
+    }
+  while (f)
+    while (e)
+      {
+	c = 0;
+	while (d)
+	  while (a)
+	    *g = foo ();
+      }
+  return 0;
+}