[RFC] overflow safe scaling of 64bit values

Message ID 20171010202338.GB31031@kam.mff.cuni.cz
State New
Headers show
Series
  • [RFC] overflow safe scaling of 64bit values
Related show

Commit Message

Jan Hubicka Oct. 10, 2017, 8:23 p.m.
Hi,
in order to drop frequencies from basic blocks and counts from edges I need
to make probabilities more precise (so we do not get all those roundoff errors
from 10000-base fixpoint arithmetics).  Increasing base is easy now, but it
means that in temporaries one can get overflows easily.

I need to compute (a*b)/c in a overflow safe way when a*b does not necessarily
need to fit into 64bit integer.  The following implement safe_scale_64bit for
it but I am not quite sure if that is most reasonable implementation.
(it uses builtins to detect overflows, inline 64bit computation if it is safe
and 128bit computation if it is not).

Any ideas for better version? If not I will go ahead with this variant and
increase profile probability base.

Honza

	* profile-count.h (slow_safe_scale_64bit): New function.
	(safe_scale_64bit): New inline.
	(profile_count::max_safe_multiplier): Remove; use safe_scale_64bit.

Comments

Richard Biener Oct. 11, 2017, 10:21 a.m. | #1
On Tue, 10 Oct 2017, Jan Hubicka wrote:

> Hi,
> in order to drop frequencies from basic blocks and counts from edges I need
> to make probabilities more precise (so we do not get all those roundoff errors
> from 10000-base fixpoint arithmetics).  Increasing base is easy now, but it
> means that in temporaries one can get overflows easily.
> 
> I need to compute (a*b)/c in a overflow safe way when a*b does not necessarily
> need to fit into 64bit integer.  The following implement safe_scale_64bit for
> it but I am not quite sure if that is most reasonable implementation.
> (it uses builtins to detect overflows, inline 64bit computation if it is safe
> and 128bit computation if it is not).
> 
> Any ideas for better version? If not I will go ahead with this variant and
> increase profile probability base.
> 
> Honza
> 
> 	* profile-count.h (slow_safe_scale_64bit): New function.
> 	(safe_scale_64bit): New inline.
> 	(profile_count::max_safe_multiplier): Remove; use safe_scale_64bit.
> Index: profile-count.h
> ===================================================================
> --- profile-count.h	(revision 253512)
> +++ profile-count.h	(working copy)
> @@ -43,6 +43,35 @@ enum profile_quality {
>  
>  #define RDIV(X,Y) (((X) + (Y) / 2) / (Y))
>  
> +bool slow_safe_scale_64bit (uint64_t a, uint64_t b, uint64_t c, uint64_t *res);
> +
> +/* Compute RES=(a*b + c/2)/c capping and return false if overflow happened.  */
> +
> +inline bool
> +safe_scale_64bit (uint64_t a, uint64_t b, uint64_t c, uint64_t *res)
> +{
> +#if (GCC_VERSION >= 5000)
> +  uint64_t tmp;
> +  if (!__builtin_mul_overflow (a, b, &tmp)
> +      && !__builtin_add_overflow (tmp, c/2, &tmp))
> +    {
> +      *res = tmp / c;
> +      return true;
> +    }
> +  if (c == 1)
> +    {
> +      *res = (uint64_t) -1;
> +      return false;
> +    }
> +#else
> +  if (a < ((uint64_t)1 << 31)
> +      && b < ((uint64_t)1 << 31)
> +      && c < ((uint64_t)1 << 31))

I think you can allow (uint64_t)1 << 32, the maximum value you'll get
is 0xffffffff then which won't overflow uint64_t.  c could be even
larger, up to (1 << 34) - 4 I think but I guess that doesn't matter.

"safe_scale_64bit" sounds a bit odd but I don't have a better one.

Richard.

> +    return (a * b + (c / 2)) / c;
> +#endif
> +  return slow_safe_scale_64bit (a, b, c, res);
> +}
> +
>  /* Data type to hold probabilities.  It implements fixed point arithmetics
>     with capping so probability is always in range [0,1] and scaling requiring
>     values greater than 1 needs to be represented otherwise.
> @@ -87,7 +116,8 @@ class GTY((user)) profile_probability
>  
>    static const int n_bits = 30;
>    static const uint32_t max_probability = REG_BR_PROB_BASE;
> -  static const uint32_t uninitialized_probability = ((uint32_t) 1 << n_bits) - 1;
> +  static const uint32_t uninitialized_probability
> +		 = ((uint32_t) 1 << (n_bits - 1)) - 1;
>  
>    uint32_t m_val : 30;
>    enum profile_quality m_quality : 2;
> @@ -535,11 +565,6 @@ class GTY(()) profile_count
>  
>    uint64_t m_val : n_bits;
>    enum profile_quality m_quality : 2;
> -
> -  /* Assume numbers smaller than this to multiply.  This is set to make
> -     testsuite pass, in future we may implement precise multiplication in higer
> -     rangers.  */
> -  static const uint64_t max_safe_multiplier = 131072;
>  public:
>  
>    /* Used for counters which are expected to be never executed.  */
> @@ -790,12 +815,9 @@ public:
>  	return *this;
>  
>        profile_count ret;
> -      /* Take care for overflows!  */
> -      if (num.m_val < max_safe_multiplier || m_val < max_safe_multiplier)
> -	ret.m_val = RDIV (m_val * num.m_val, den.m_val);
> -      else
> -	ret.m_val = RDIV (m_val * RDIV (num.m_val * max_safe_multiplier,
> -					den.m_val), max_safe_multiplier);
> +      uint64_t val;
> +      safe_scale_64bit (m_val, num.m_val, den.m_val, &val);
> +      ret.m_val = MIN (val, max_count);
>        ret.m_quality = MIN (m_quality, profile_adjusted);
>        return ret;
>      }
> Index: profile-count.c
> ===================================================================
> --- profile-count.c	(revision 253512)
> +++ profile-count.c	(working copy)
> @@ -30,6 +30,7 @@ along with GCC; see the file COPYING3.
>  #include "gimple.h"
>  #include "data-streamer.h"
>  #include "cgraph.h"
> +#include "wide-int.h"
>  
>  /* Dump THIS to F.  */
>  
> @@ -194,3 +195,21 @@ profile_probability::stream_out (struct
>    streamer_write_uhwi_stream (ob, m_val);
>    streamer_write_uhwi_stream (ob, m_quality);
>  }
> +
> +/* Compute RES=(a*b + c/2)/c capping and return false if overflow happened.  */
> +
> +bool
> +slow_safe_scale_64bit (uint64_t a, uint64_t b, uint64_t c, uint64_t *res)
> +{
> +  FIXED_WIDE_INT (128) tmp = a;
> +  bool overflow;
> +  tmp = wi::udiv_floor (wi::umul (tmp, b, &overflow) + (c / 2), c);
> +  gcc_checking_assert (!overflow);
> +  if (wi::fits_uhwi_p (tmp))
> +    {
> +      *res = tmp.to_uhwi ();
> +      return true;
> +    }
> +  *res = (uint64_t) -1;
> +  return false;
> +}
> 
>

Patch

Index: profile-count.h
===================================================================
--- profile-count.h	(revision 253512)
+++ profile-count.h	(working copy)
@@ -43,6 +43,35 @@  enum profile_quality {
 
 #define RDIV(X,Y) (((X) + (Y) / 2) / (Y))
 
+bool slow_safe_scale_64bit (uint64_t a, uint64_t b, uint64_t c, uint64_t *res);
+
+/* Compute RES=(a*b + c/2)/c capping and return false if overflow happened.  */
+
+inline bool
+safe_scale_64bit (uint64_t a, uint64_t b, uint64_t c, uint64_t *res)
+{
+#if (GCC_VERSION >= 5000)
+  uint64_t tmp;
+  if (!__builtin_mul_overflow (a, b, &tmp)
+      && !__builtin_add_overflow (tmp, c/2, &tmp))
+    {
+      *res = tmp / c;
+      return true;
+    }
+  if (c == 1)
+    {
+      *res = (uint64_t) -1;
+      return false;
+    }
+#else
+  if (a < ((uint64_t)1 << 31)
+      && b < ((uint64_t)1 << 31)
+      && c < ((uint64_t)1 << 31))
+    return (a * b + (c / 2)) / c;
+#endif
+  return slow_safe_scale_64bit (a, b, c, res);
+}
+
 /* Data type to hold probabilities.  It implements fixed point arithmetics
    with capping so probability is always in range [0,1] and scaling requiring
    values greater than 1 needs to be represented otherwise.
@@ -87,7 +116,8 @@  class GTY((user)) profile_probability
 
   static const int n_bits = 30;
   static const uint32_t max_probability = REG_BR_PROB_BASE;
-  static const uint32_t uninitialized_probability = ((uint32_t) 1 << n_bits) - 1;
+  static const uint32_t uninitialized_probability
+		 = ((uint32_t) 1 << (n_bits - 1)) - 1;
 
   uint32_t m_val : 30;
   enum profile_quality m_quality : 2;
@@ -535,11 +565,6 @@  class GTY(()) profile_count
 
   uint64_t m_val : n_bits;
   enum profile_quality m_quality : 2;
-
-  /* Assume numbers smaller than this to multiply.  This is set to make
-     testsuite pass, in future we may implement precise multiplication in higer
-     rangers.  */
-  static const uint64_t max_safe_multiplier = 131072;
 public:
 
   /* Used for counters which are expected to be never executed.  */
@@ -790,12 +815,9 @@  public:
 	return *this;
 
       profile_count ret;
-      /* Take care for overflows!  */
-      if (num.m_val < max_safe_multiplier || m_val < max_safe_multiplier)
-	ret.m_val = RDIV (m_val * num.m_val, den.m_val);
-      else
-	ret.m_val = RDIV (m_val * RDIV (num.m_val * max_safe_multiplier,
-					den.m_val), max_safe_multiplier);
+      uint64_t val;
+      safe_scale_64bit (m_val, num.m_val, den.m_val, &val);
+      ret.m_val = MIN (val, max_count);
       ret.m_quality = MIN (m_quality, profile_adjusted);
       return ret;
     }
Index: profile-count.c
===================================================================
--- profile-count.c	(revision 253512)
+++ profile-count.c	(working copy)
@@ -30,6 +30,7 @@  along with GCC; see the file COPYING3.
 #include "gimple.h"
 #include "data-streamer.h"
 #include "cgraph.h"
+#include "wide-int.h"
 
 /* Dump THIS to F.  */
 
@@ -194,3 +195,21 @@  profile_probability::stream_out (struct
   streamer_write_uhwi_stream (ob, m_val);
   streamer_write_uhwi_stream (ob, m_quality);
 }
+
+/* Compute RES=(a*b + c/2)/c capping and return false if overflow happened.  */
+
+bool
+slow_safe_scale_64bit (uint64_t a, uint64_t b, uint64_t c, uint64_t *res)
+{
+  FIXED_WIDE_INT (128) tmp = a;
+  bool overflow;
+  tmp = wi::udiv_floor (wi::umul (tmp, b, &overflow) + (c / 2), c);
+  gcc_checking_assert (!overflow);
+  if (wi::fits_uhwi_p (tmp))
+    {
+      *res = tmp.to_uhwi ();
+      return true;
+    }
+  *res = (uint64_t) -1;
+  return false;
+}