Message ID | 20171010202338.GB31031@kam.mff.cuni.cz |
---|---|
State | New |
Headers | show |
Series | [RFC] overflow safe scaling of 64bit values | expand |
On Tue, 10 Oct 2017, Jan Hubicka wrote: > Hi, > in order to drop frequencies from basic blocks and counts from edges I need > to make probabilities more precise (so we do not get all those roundoff errors > from 10000-base fixpoint arithmetics). Increasing base is easy now, but it > means that in temporaries one can get overflows easily. > > I need to compute (a*b)/c in a overflow safe way when a*b does not necessarily > need to fit into 64bit integer. The following implement safe_scale_64bit for > it but I am not quite sure if that is most reasonable implementation. > (it uses builtins to detect overflows, inline 64bit computation if it is safe > and 128bit computation if it is not). > > Any ideas for better version? If not I will go ahead with this variant and > increase profile probability base. > > Honza > > * profile-count.h (slow_safe_scale_64bit): New function. > (safe_scale_64bit): New inline. > (profile_count::max_safe_multiplier): Remove; use safe_scale_64bit. > Index: profile-count.h > =================================================================== > --- profile-count.h (revision 253512) > +++ profile-count.h (working copy) > @@ -43,6 +43,35 @@ enum profile_quality { > > #define RDIV(X,Y) (((X) + (Y) / 2) / (Y)) > > +bool slow_safe_scale_64bit (uint64_t a, uint64_t b, uint64_t c, uint64_t *res); > + > +/* Compute RES=(a*b + c/2)/c capping and return false if overflow happened. */ > + > +inline bool > +safe_scale_64bit (uint64_t a, uint64_t b, uint64_t c, uint64_t *res) > +{ > +#if (GCC_VERSION >= 5000) > + uint64_t tmp; > + if (!__builtin_mul_overflow (a, b, &tmp) > + && !__builtin_add_overflow (tmp, c/2, &tmp)) > + { > + *res = tmp / c; > + return true; > + } > + if (c == 1) > + { > + *res = (uint64_t) -1; > + return false; > + } > +#else > + if (a < ((uint64_t)1 << 31) > + && b < ((uint64_t)1 << 31) > + && c < ((uint64_t)1 << 31)) I think you can allow (uint64_t)1 << 32, the maximum value you'll get is 0xffffffff then which won't overflow uint64_t. c could be even larger, up to (1 << 34) - 4 I think but I guess that doesn't matter. "safe_scale_64bit" sounds a bit odd but I don't have a better one. Richard. > + return (a * b + (c / 2)) / c; > +#endif > + return slow_safe_scale_64bit (a, b, c, res); > +} > + > /* Data type to hold probabilities. It implements fixed point arithmetics > with capping so probability is always in range [0,1] and scaling requiring > values greater than 1 needs to be represented otherwise. > @@ -87,7 +116,8 @@ class GTY((user)) profile_probability > > static const int n_bits = 30; > static const uint32_t max_probability = REG_BR_PROB_BASE; > - static const uint32_t uninitialized_probability = ((uint32_t) 1 << n_bits) - 1; > + static const uint32_t uninitialized_probability > + = ((uint32_t) 1 << (n_bits - 1)) - 1; > > uint32_t m_val : 30; > enum profile_quality m_quality : 2; > @@ -535,11 +565,6 @@ class GTY(()) profile_count > > uint64_t m_val : n_bits; > enum profile_quality m_quality : 2; > - > - /* Assume numbers smaller than this to multiply. This is set to make > - testsuite pass, in future we may implement precise multiplication in higer > - rangers. */ > - static const uint64_t max_safe_multiplier = 131072; > public: > > /* Used for counters which are expected to be never executed. */ > @@ -790,12 +815,9 @@ public: > return *this; > > profile_count ret; > - /* Take care for overflows! */ > - if (num.m_val < max_safe_multiplier || m_val < max_safe_multiplier) > - ret.m_val = RDIV (m_val * num.m_val, den.m_val); > - else > - ret.m_val = RDIV (m_val * RDIV (num.m_val * max_safe_multiplier, > - den.m_val), max_safe_multiplier); > + uint64_t val; > + safe_scale_64bit (m_val, num.m_val, den.m_val, &val); > + ret.m_val = MIN (val, max_count); > ret.m_quality = MIN (m_quality, profile_adjusted); > return ret; > } > Index: profile-count.c > =================================================================== > --- profile-count.c (revision 253512) > +++ profile-count.c (working copy) > @@ -30,6 +30,7 @@ along with GCC; see the file COPYING3. > #include "gimple.h" > #include "data-streamer.h" > #include "cgraph.h" > +#include "wide-int.h" > > /* Dump THIS to F. */ > > @@ -194,3 +195,21 @@ profile_probability::stream_out (struct > streamer_write_uhwi_stream (ob, m_val); > streamer_write_uhwi_stream (ob, m_quality); > } > + > +/* Compute RES=(a*b + c/2)/c capping and return false if overflow happened. */ > + > +bool > +slow_safe_scale_64bit (uint64_t a, uint64_t b, uint64_t c, uint64_t *res) > +{ > + FIXED_WIDE_INT (128) tmp = a; > + bool overflow; > + tmp = wi::udiv_floor (wi::umul (tmp, b, &overflow) + (c / 2), c); > + gcc_checking_assert (!overflow); > + if (wi::fits_uhwi_p (tmp)) > + { > + *res = tmp.to_uhwi (); > + return true; > + } > + *res = (uint64_t) -1; > + return false; > +} > >
Index: profile-count.h =================================================================== --- profile-count.h (revision 253512) +++ profile-count.h (working copy) @@ -43,6 +43,35 @@ enum profile_quality { #define RDIV(X,Y) (((X) + (Y) / 2) / (Y)) +bool slow_safe_scale_64bit (uint64_t a, uint64_t b, uint64_t c, uint64_t *res); + +/* Compute RES=(a*b + c/2)/c capping and return false if overflow happened. */ + +inline bool +safe_scale_64bit (uint64_t a, uint64_t b, uint64_t c, uint64_t *res) +{ +#if (GCC_VERSION >= 5000) + uint64_t tmp; + if (!__builtin_mul_overflow (a, b, &tmp) + && !__builtin_add_overflow (tmp, c/2, &tmp)) + { + *res = tmp / c; + return true; + } + if (c == 1) + { + *res = (uint64_t) -1; + return false; + } +#else + if (a < ((uint64_t)1 << 31) + && b < ((uint64_t)1 << 31) + && c < ((uint64_t)1 << 31)) + return (a * b + (c / 2)) / c; +#endif + return slow_safe_scale_64bit (a, b, c, res); +} + /* Data type to hold probabilities. It implements fixed point arithmetics with capping so probability is always in range [0,1] and scaling requiring values greater than 1 needs to be represented otherwise. @@ -87,7 +116,8 @@ class GTY((user)) profile_probability static const int n_bits = 30; static const uint32_t max_probability = REG_BR_PROB_BASE; - static const uint32_t uninitialized_probability = ((uint32_t) 1 << n_bits) - 1; + static const uint32_t uninitialized_probability + = ((uint32_t) 1 << (n_bits - 1)) - 1; uint32_t m_val : 30; enum profile_quality m_quality : 2; @@ -535,11 +565,6 @@ class GTY(()) profile_count uint64_t m_val : n_bits; enum profile_quality m_quality : 2; - - /* Assume numbers smaller than this to multiply. This is set to make - testsuite pass, in future we may implement precise multiplication in higer - rangers. */ - static const uint64_t max_safe_multiplier = 131072; public: /* Used for counters which are expected to be never executed. */ @@ -790,12 +815,9 @@ public: return *this; profile_count ret; - /* Take care for overflows! */ - if (num.m_val < max_safe_multiplier || m_val < max_safe_multiplier) - ret.m_val = RDIV (m_val * num.m_val, den.m_val); - else - ret.m_val = RDIV (m_val * RDIV (num.m_val * max_safe_multiplier, - den.m_val), max_safe_multiplier); + uint64_t val; + safe_scale_64bit (m_val, num.m_val, den.m_val, &val); + ret.m_val = MIN (val, max_count); ret.m_quality = MIN (m_quality, profile_adjusted); return ret; } Index: profile-count.c =================================================================== --- profile-count.c (revision 253512) +++ profile-count.c (working copy) @@ -30,6 +30,7 @@ along with GCC; see the file COPYING3. #include "gimple.h" #include "data-streamer.h" #include "cgraph.h" +#include "wide-int.h" /* Dump THIS to F. */ @@ -194,3 +195,21 @@ profile_probability::stream_out (struct streamer_write_uhwi_stream (ob, m_val); streamer_write_uhwi_stream (ob, m_quality); } + +/* Compute RES=(a*b + c/2)/c capping and return false if overflow happened. */ + +bool +slow_safe_scale_64bit (uint64_t a, uint64_t b, uint64_t c, uint64_t *res) +{ + FIXED_WIDE_INT (128) tmp = a; + bool overflow; + tmp = wi::udiv_floor (wi::umul (tmp, b, &overflow) + (c / 2), c); + gcc_checking_assert (!overflow); + if (wi::fits_uhwi_p (tmp)) + { + *res = tmp.to_uhwi (); + return true; + } + *res = (uint64_t) -1; + return false; +}