Message ID | 20220729123211.876374-1-adhemerval.zanella@linaro.org |
---|---|
State | New |
Headers | show |
Series | [v2] stdlib: Simplify arc4random_uniform | expand |
Based on previous comments I will commit this shortly. On 29/07/22 09:32, Adhemerval Zanella wrote: > It uses the bitmask with rejection [1], which calculates a mask > being the lowest power of two bounding the request upper bound, > successively queries new random values, and rejects values > outside the requested range. > > Performance-wise, there is no much gain in trying to conserve > bits since arc4random is wrapper on getrandom syscall. It should > be cheaper to just query a uint32_t value. The algorithm also > avoids modulo and divide operations, which might be costly > depending of the architecture. > > [1] https://www.pcg-random.org/posts/bounded-rands.html > > Reviewed-by: Yann Droneaud <ydroneaud@opteya.com> > --- > v2: Fixed typos in commit message and simplify loop. > --- > stdlib/arc4random_uniform.c | 129 +++++++++--------------------------- > 1 file changed, 30 insertions(+), 99 deletions(-) > > diff --git a/stdlib/arc4random_uniform.c b/stdlib/arc4random_uniform.c > index 1326dfa593..5aa98d1c13 100644 > --- a/stdlib/arc4random_uniform.c > +++ b/stdlib/arc4random_uniform.c > @@ -17,38 +17,19 @@ > License along with the GNU C Library; if not, see > <https://www.gnu.org/licenses/>. */ > > -#include <endian.h> > -#include <libc-lock.h> > #include <stdlib.h> > #include <sys/param.h> > > -/* Return the number of bytes which cover values up to the limit. */ > -__attribute__ ((const)) > -static uint32_t > -byte_count (uint32_t n) > -{ > - if (n < (1U << 8)) > - return 1; > - else if (n < (1U << 16)) > - return 2; > - else if (n < (1U << 24)) > - return 3; > - else > - return 4; > -} > +/* Return a uniformly distributed random number less than N. The algorithm > + calculates a mask being the lowest power of two bounding the upper bound > + N, successively queries new random values, and rejects values outside of > + the request range. > > -/* Fill the lower bits of the result with randomness, according to the > - number of bytes requested. */ > -static void > -random_bytes (uint32_t *result, uint32_t byte_count) > -{ > - *result = 0; > - unsigned char *ptr = (unsigned char *) result; > - if (__BYTE_ORDER == __BIG_ENDIAN) > - ptr += 4 - byte_count; > - __arc4random_buf (ptr, byte_count); > -} > + For reject values, it also tries if the remaining entropy could fit on > + the asked range after range adjustment. > > + The algorithm avoids modulo and divide operations, which might be costly > + depending on the architecture. */ > uint32_t > __arc4random_uniform (uint32_t n) > { > @@ -57,83 +38,33 @@ __arc4random_uniform (uint32_t n) > only possible result for limit 1. */ > return 0; > > - /* The bits variable serves as a source for bits. Prefetch the > - minimum number of bytes needed. */ > - uint32_t count = byte_count (n); > - uint32_t bits_length = count * CHAR_BIT; > - uint32_t bits; > - random_bytes (&bits, count); > - > /* Powers of two are easy. */ > if (powerof2 (n)) > - return bits & (n - 1); > - > - /* The general case. This algorithm follows Jérémie Lumbroso, > - Optimal Discrete Uniform Generation from Coin Flips, and > - Applications (2013), who credits Donald E. Knuth and Andrew > - C. Yao, The complexity of nonuniform random number generation > - (1976), for solving the general case. > + return __arc4random () & (n - 1); > > - The implementation below unrolls the initialization stage of the > - loop, where v is less than n. */ > + /* mask is the smallest power of 2 minus 1 number larger than n. */ > + int z = __builtin_clz (n); > + uint32_t mask = ~UINT32_C(0) >> z; > + int bits = CHAR_BIT * sizeof (uint32_t) - z; > > - /* Use 64-bit variables even though the intermediate results are > - never larger than 33 bits. This ensures the code is easier to > - compile on 64-bit architectures. */ > - uint64_t v; > - uint64_t c; > - > - /* Initialize v and c. v is the smallest power of 2 which is larger > - than n.*/ > - { > - uint32_t log2p1 = 32 - __builtin_clz (n); > - v = 1ULL << log2p1; > - c = bits & (v - 1); > - bits >>= log2p1; > - bits_length -= log2p1; > - } > - > - /* At the start of the loop, c is uniformly distributed within the > - half-open interval [0, v), and v < 2n < 2**33. */ > - while (true) > + while (1) > { > - if (v >= n) > - { > - /* If the candidate is less than n, accept it. */ > - if (c < n) > - /* c is uniformly distributed on [0, n). */ > - return c; > - else > - { > - /* c is uniformly distributed on [n, v). */ > - v -= n; > - c -= n; > - /* The distribution was shifted, so c is uniformly > - distributed on [0, v) again. */ > - } > - } > - /* v < n here. */ > - > - /* Replenish the bit source if necessary. */ > - if (bits_length == 0) > - { > - /* Overwrite the least significant byte. */ > - random_bytes (&bits, 1); > - bits_length = CHAR_BIT; > - } > - > - /* Double the range. No overflow because v < n < 2**32. */ > - v *= 2; > - /* v < 2n here. */ > - > - /* Extract a bit and append it to c. c remains less than v and > - thus 2**33. */ > - c = (c << 1) | (bits & 1); > - bits >>= 1; > - --bits_length; > - > - /* At this point, c is uniformly distributed on [0, v) again, > - and v < 2n < 2**33. */ > + uint32_t value = __arc4random (); > + > + /* Return if the lower power of 2 minus 1 satisfy the condition. */ > + uint32_t r = value & mask; > + if (r < n) > + return r; > + > + /* Otherwise check if remaining bits of entropy provides fits in the > + bound. */ > + for (int bits_left = z; bits_left >= bits; bits_left -= bits) > + { > + value >>= bits; > + r = value & mask; > + if (r < n) > + return r; > + } > } > } > libc_hidden_def (__arc4random_uniform)
On Fri, Jul 29, 2022 at 8:32 PM Adhemerval Zanella via Libc-alpha <libc-alpha@sourceware.org> wrote: > > It uses the bitmask with rejection [1], which calculates a mask > being the lowest power of two bounding the request upper bound, > successively queries new random values, and rejects values > outside the requested range. > > Performance-wise, there is no much gain in trying to conserve > bits since arc4random is wrapper on getrandom syscall. It should > be cheaper to just query a uint32_t value. The algorithm also > avoids modulo and divide operations, which might be costly > depending of the architecture. > > [1] https://www.pcg-random.org/posts/bounded-rands.html > > Reviewed-by: Yann Droneaud <ydroneaud@opteya.com> > --- > v2: Fixed typos in commit message and simplify loop. > --- > stdlib/arc4random_uniform.c | 129 +++++++++--------------------------- > 1 file changed, 30 insertions(+), 99 deletions(-) > > diff --git a/stdlib/arc4random_uniform.c b/stdlib/arc4random_uniform.c > index 1326dfa593..5aa98d1c13 100644 > --- a/stdlib/arc4random_uniform.c > +++ b/stdlib/arc4random_uniform.c > @@ -17,38 +17,19 @@ > License along with the GNU C Library; if not, see > <https://www.gnu.org/licenses/>. */ > > -#include <endian.h> > -#include <libc-lock.h> > #include <stdlib.h> > #include <sys/param.h> > > -/* Return the number of bytes which cover values up to the limit. */ > -__attribute__ ((const)) > -static uint32_t > -byte_count (uint32_t n) > -{ > - if (n < (1U << 8)) > - return 1; > - else if (n < (1U << 16)) > - return 2; > - else if (n < (1U << 24)) > - return 3; > - else > - return 4; > -} > +/* Return a uniformly distributed random number less than N. The algorithm > + calculates a mask being the lowest power of two bounding the upper bound > + N, successively queries new random values, and rejects values outside of > + the request range. > > -/* Fill the lower bits of the result with randomness, according to the > - number of bytes requested. */ > -static void > -random_bytes (uint32_t *result, uint32_t byte_count) > -{ > - *result = 0; > - unsigned char *ptr = (unsigned char *) result; > - if (__BYTE_ORDER == __BIG_ENDIAN) > - ptr += 4 - byte_count; > - __arc4random_buf (ptr, byte_count); > -} > + For reject values, it also tries if the remaining entropy could fit on > + the asked range after range adjustment. > > + The algorithm avoids modulo and divide operations, which might be costly > + depending on the architecture. */ > uint32_t > __arc4random_uniform (uint32_t n) > { > @@ -57,83 +38,33 @@ __arc4random_uniform (uint32_t n) > only possible result for limit 1. */ > return 0; > > - /* The bits variable serves as a source for bits. Prefetch the > - minimum number of bytes needed. */ > - uint32_t count = byte_count (n); > - uint32_t bits_length = count * CHAR_BIT; > - uint32_t bits; > - random_bytes (&bits, count); > - > /* Powers of two are easy. */ > if (powerof2 (n)) > - return bits & (n - 1); > - > - /* The general case. This algorithm follows Jérémie Lumbroso, > - Optimal Discrete Uniform Generation from Coin Flips, and > - Applications (2013), who credits Donald E. Knuth and Andrew > - C. Yao, The complexity of nonuniform random number generation > - (1976), for solving the general case. > + return __arc4random () & (n - 1); > > - The implementation below unrolls the initialization stage of the > - loop, where v is less than n. */ > + /* mask is the smallest power of 2 minus 1 number larger than n. */ > + int z = __builtin_clz (n); > + uint32_t mask = ~UINT32_C(0) >> z; > + int bits = CHAR_BIT * sizeof (uint32_t) - z; > > - /* Use 64-bit variables even though the intermediate results are > - never larger than 33 bits. This ensures the code is easier to > - compile on 64-bit architectures. */ > - uint64_t v; > - uint64_t c; > - > - /* Initialize v and c. v is the smallest power of 2 which is larger > - than n.*/ > - { > - uint32_t log2p1 = 32 - __builtin_clz (n); > - v = 1ULL << log2p1; > - c = bits & (v - 1); > - bits >>= log2p1; > - bits_length -= log2p1; > - } > - > - /* At the start of the loop, c is uniformly distributed within the > - half-open interval [0, v), and v < 2n < 2**33. */ > - while (true) > + while (1) > { > - if (v >= n) > - { > - /* If the candidate is less than n, accept it. */ > - if (c < n) > - /* c is uniformly distributed on [0, n). */ > - return c; > - else > - { > - /* c is uniformly distributed on [n, v). */ > - v -= n; > - c -= n; > - /* The distribution was shifted, so c is uniformly > - distributed on [0, v) again. */ > - } > - } > - /* v < n here. */ > - > - /* Replenish the bit source if necessary. */ > - if (bits_length == 0) > - { > - /* Overwrite the least significant byte. */ > - random_bytes (&bits, 1); > - bits_length = CHAR_BIT; > - } > - > - /* Double the range. No overflow because v < n < 2**32. */ > - v *= 2; > - /* v < 2n here. */ > - > - /* Extract a bit and append it to c. c remains less than v and > - thus 2**33. */ > - c = (c << 1) | (bits & 1); > - bits >>= 1; > - --bits_length; > - > - /* At this point, c is uniformly distributed on [0, v) again, > - and v < 2n < 2**33. */ > + uint32_t value = __arc4random (); > + > + /* Return if the lower power of 2 minus 1 satisfy the condition. */ > + uint32_t r = value & mask; > + if (r < n) > + return r; > + > + /* Otherwise check if remaining bits of entropy provides fits in the > + bound. */ > + for (int bits_left = z; bits_left >= bits; bits_left -= bits) > + { > + value >>= bits; Can this just be shift by 1 and repeat (32 - z) times or does that introduce bias (not seeing exactly why it would)? > + r = value & mask; > + if (r < n) > + return r; > + } > } > } > libc_hidden_def (__arc4random_uniform) > -- > 2.34.1 >
Hi, Le 01/08/2022 à 21:20, Noah Goldstein a écrit : >> diff --git a/stdlib/arc4random_uniform.c b/stdlib/arc4random_uniform.c >> index 1326dfa593..5aa98d1c13 100644 >> --- a/stdlib/arc4random_uniform.c >> +++ b/stdlib/arc4random_uniform.c >> >> uint32_t >> __arc4random_uniform (uint32_t n) >> { >> @@ -57,83 +38,33 @@ __arc4random_uniform (uint32_t n) >> + while (1) >> { >> + uint32_t value = __arc4random (); >> + >> + /* Return if the lower power of 2 minus 1 satisfy the condition. */ >> + uint32_t r = value & mask; >> + if (r < n) >> + return r; >> + >> + /* Otherwise check if remaining bits of entropy provides fits in the >> + bound. */ >> + for (int bits_left = z; bits_left >= bits; bits_left -= bits) >> + { >> + value >>= bits; > Can this just be shift by 1 and repeat (32 - z) times or does that > introduce bias (not seeing exactly why it would)? That was bothering me too, as I was thinking a rotation would be possible instead of shift. I posted the question https://crypto.stackexchange.com/questions/101325/uniform-rejection-sampling-by-shifting-or-rotating-bits-from-csprng-output-safe The answer: there's indeed a bias. This explains why my attempt with rotation leads to dieharder complaining. It was so obvious ... Damn Regards.
On 02/08/22 09:08, Yann Droneaud wrote: > Hi, > > Le 01/08/2022 à 21:20, Noah Goldstein a écrit : >>> diff --git a/stdlib/arc4random_uniform.c b/stdlib/arc4random_uniform.c >>> index 1326dfa593..5aa98d1c13 100644 >>> --- a/stdlib/arc4random_uniform.c >>> +++ b/stdlib/arc4random_uniform.c >>> >>> uint32_t >>> __arc4random_uniform (uint32_t n) >>> { >>> @@ -57,83 +38,33 @@ __arc4random_uniform (uint32_t n) >>> + while (1) >>> { >>> + uint32_t value = __arc4random (); >>> + >>> + /* Return if the lower power of 2 minus 1 satisfy the condition. */ >>> + uint32_t r = value & mask; >>> + if (r < n) >>> + return r; >>> + >>> + /* Otherwise check if remaining bits of entropy provides fits in the >>> + bound. */ >>> + for (int bits_left = z; bits_left >= bits; bits_left -= bits) >>> + { >>> + value >>= bits; >> Can this just be shift by 1 and repeat (32 - z) times or does that >> introduce bias (not seeing exactly why it would)? > > > That was bothering me too, as I was thinking a rotation would be possible instead of shift. > > I posted the question https://crypto.stackexchange.com/questions/101325/uniform-rejection-sampling-by-shifting-or-rotating-bits-from-csprng-output-safe > > The answer: there's indeed a bias. > > This explains why my attempt with rotation leads to dieharder complaining. It was so obvious ... Damn > Thanks, I will remove it then. We might evaluate later if using the mask and compare is indeed better than the other methods (as using by OpenBSD).
Le 02/08/2022 à 14:14, Adhemerval Zanella Netto a écrit : > > On 02/08/22 09:08, Yann Droneaud wrote: >> Hi, >> >> Le 01/08/2022 à 21:20, Noah Goldstein a écrit : >>>> diff --git a/stdlib/arc4random_uniform.c b/stdlib/arc4random_uniform.c >>>> index 1326dfa593..5aa98d1c13 100644 >>>> --- a/stdlib/arc4random_uniform.c >>>> +++ b/stdlib/arc4random_uniform.c >>>> >>>> uint32_t >>>> __arc4random_uniform (uint32_t n) >>>> { >>>> @@ -57,83 +38,33 @@ __arc4random_uniform (uint32_t n) >>>> + while (1) >>>> { >>>> + uint32_t value = __arc4random (); >>>> + >>>> + /* Return if the lower power of 2 minus 1 satisfy the condition. */ >>>> + uint32_t r = value & mask; >>>> + if (r < n) >>>> + return r; >>>> + >>>> + /* Otherwise check if remaining bits of entropy provides fits in the >>>> + bound. */ >>>> + for (int bits_left = z; bits_left >= bits; bits_left -= bits) >>>> + { >>>> + value >>= bits; >>> Can this just be shift by 1 and repeat (32 - z) times or does that >>> introduce bias (not seeing exactly why it would)? >> >> That was bothering me too, as I was thinking a rotation would be possible instead of shift. >> >> I posted the question https://crypto.stackexchange.com/questions/101325/uniform-rejection-sampling-by-shifting-or-rotating-bits-from-csprng-output-safe >> >> The answer: there's indeed a bias. >> >> This explains why my attempt with rotation leads to dieharder complaining. It was so obvious ... Damn >> > > Thanks, I will remove it then. We might evaluate later if using the mask > and compare is indeed better than the other methods (as using by OpenBSD). > It's the proposal to shift by 1 bit which would introduce a bias. The implementation used in Glibc is fine. Regards.
On 02/08/22 09:26, Yann Droneaud wrote: > Le 02/08/2022 à 14:14, Adhemerval Zanella Netto a écrit : >> >> On 02/08/22 09:08, Yann Droneaud wrote: >>> Hi, >>> >>> Le 01/08/2022 à 21:20, Noah Goldstein a écrit : >>>>> diff --git a/stdlib/arc4random_uniform.c b/stdlib/arc4random_uniform.c >>>>> index 1326dfa593..5aa98d1c13 100644 >>>>> --- a/stdlib/arc4random_uniform.c >>>>> +++ b/stdlib/arc4random_uniform.c >>>>> >>>>> uint32_t >>>>> __arc4random_uniform (uint32_t n) >>>>> { >>>>> @@ -57,83 +38,33 @@ __arc4random_uniform (uint32_t n) >>>>> + while (1) >>>>> { >>>>> + uint32_t value = __arc4random (); >>>>> + >>>>> + /* Return if the lower power of 2 minus 1 satisfy the condition. */ >>>>> + uint32_t r = value & mask; >>>>> + if (r < n) >>>>> + return r; >>>>> + >>>>> + /* Otherwise check if remaining bits of entropy provides fits in the >>>>> + bound. */ >>>>> + for (int bits_left = z; bits_left >= bits; bits_left -= bits) >>>>> + { >>>>> + value >>= bits; >>>> Can this just be shift by 1 and repeat (32 - z) times or does that >>>> introduce bias (not seeing exactly why it would)? >>> >>> That was bothering me too, as I was thinking a rotation would be possible instead of shift. >>> >>> I posted the question https://crypto.stackexchange.com/questions/101325/uniform-rejection-sampling-by-shifting-or-rotating-bits-from-csprng-output-safe >>> >>> The answer: there's indeed a bias. >>> >>> This explains why my attempt with rotation leads to dieharder complaining. It was so obvious ... Damn >>> >> >> Thanks, I will remove it then. We might evaluate later if using the mask >> and compare is indeed better than the other methods (as using by OpenBSD). >> > It's the proposal to shift by 1 bit which would introduce a bias. > > The implementation used in Glibc is fine. Hum I understood wrongly then, since it seemed you are answering to Noah question for glibc implementation itself. I though I has this sort out, but I am not sure anymore.
On Tue, Aug 2, 2022 at 8:08 PM Yann Droneaud <ydroneaud@opteya.com> wrote: > > Hi, > > Le 01/08/2022 à 21:20, Noah Goldstein a écrit : > >> diff --git a/stdlib/arc4random_uniform.c b/stdlib/arc4random_uniform.c > >> index 1326dfa593..5aa98d1c13 100644 > >> --- a/stdlib/arc4random_uniform.c > >> +++ b/stdlib/arc4random_uniform.c > >> > >> uint32_t > >> __arc4random_uniform (uint32_t n) > >> { > >> @@ -57,83 +38,33 @@ __arc4random_uniform (uint32_t n) > >> + while (1) > >> { > >> + uint32_t value = __arc4random (); > >> + > >> + /* Return if the lower power of 2 minus 1 satisfy the condition. */ > >> + uint32_t r = value & mask; > >> + if (r < n) > >> + return r; > >> + > >> + /* Otherwise check if remaining bits of entropy provides fits in the > >> + bound. */ > >> + for (int bits_left = z; bits_left >= bits; bits_left -= bits) > >> + { > >> + value >>= bits; > > Can this just be shift by 1 and repeat (32 - z) times or does that > > introduce bias (not seeing exactly why it would)? > > > That was bothering me too, as I was thinking a rotation would be > possible instead of shift. > > I posted the question > https://crypto.stackexchange.com/questions/101325/uniform-rejection-sampling-by-shifting-or-rotating-bits-from-csprng-output-safe > > The answer: there's indeed a bias. Hmm, based on the answer it seems we could shift by `popcnt(n)` (which is guranteed to be <= bits). If I understand correctly the issue is the consecutive leading 1s essentially fix ensuing ones but `popcnt(n)` will always clear those fixed ones. After the first zero, the rest of the bits can be anything I believe so there will be no bias from them. > > This explains why my attempt with rotation leads to dieharder > complaining. It was so obvious ... Damn > > > Regards. > > > -- > > Yann Droneaud > > OPTEYA > >
diff --git a/stdlib/arc4random_uniform.c b/stdlib/arc4random_uniform.c index 1326dfa593..5aa98d1c13 100644 --- a/stdlib/arc4random_uniform.c +++ b/stdlib/arc4random_uniform.c @@ -17,38 +17,19 @@ License along with the GNU C Library; if not, see <https://www.gnu.org/licenses/>. */ -#include <endian.h> -#include <libc-lock.h> #include <stdlib.h> #include <sys/param.h> -/* Return the number of bytes which cover values up to the limit. */ -__attribute__ ((const)) -static uint32_t -byte_count (uint32_t n) -{ - if (n < (1U << 8)) - return 1; - else if (n < (1U << 16)) - return 2; - else if (n < (1U << 24)) - return 3; - else - return 4; -} +/* Return a uniformly distributed random number less than N. The algorithm + calculates a mask being the lowest power of two bounding the upper bound + N, successively queries new random values, and rejects values outside of + the request range. -/* Fill the lower bits of the result with randomness, according to the - number of bytes requested. */ -static void -random_bytes (uint32_t *result, uint32_t byte_count) -{ - *result = 0; - unsigned char *ptr = (unsigned char *) result; - if (__BYTE_ORDER == __BIG_ENDIAN) - ptr += 4 - byte_count; - __arc4random_buf (ptr, byte_count); -} + For reject values, it also tries if the remaining entropy could fit on + the asked range after range adjustment. + The algorithm avoids modulo and divide operations, which might be costly + depending on the architecture. */ uint32_t __arc4random_uniform (uint32_t n) { @@ -57,83 +38,33 @@ __arc4random_uniform (uint32_t n) only possible result for limit 1. */ return 0; - /* The bits variable serves as a source for bits. Prefetch the - minimum number of bytes needed. */ - uint32_t count = byte_count (n); - uint32_t bits_length = count * CHAR_BIT; - uint32_t bits; - random_bytes (&bits, count); - /* Powers of two are easy. */ if (powerof2 (n)) - return bits & (n - 1); - - /* The general case. This algorithm follows Jérémie Lumbroso, - Optimal Discrete Uniform Generation from Coin Flips, and - Applications (2013), who credits Donald E. Knuth and Andrew - C. Yao, The complexity of nonuniform random number generation - (1976), for solving the general case. + return __arc4random () & (n - 1); - The implementation below unrolls the initialization stage of the - loop, where v is less than n. */ + /* mask is the smallest power of 2 minus 1 number larger than n. */ + int z = __builtin_clz (n); + uint32_t mask = ~UINT32_C(0) >> z; + int bits = CHAR_BIT * sizeof (uint32_t) - z; - /* Use 64-bit variables even though the intermediate results are - never larger than 33 bits. This ensures the code is easier to - compile on 64-bit architectures. */ - uint64_t v; - uint64_t c; - - /* Initialize v and c. v is the smallest power of 2 which is larger - than n.*/ - { - uint32_t log2p1 = 32 - __builtin_clz (n); - v = 1ULL << log2p1; - c = bits & (v - 1); - bits >>= log2p1; - bits_length -= log2p1; - } - - /* At the start of the loop, c is uniformly distributed within the - half-open interval [0, v), and v < 2n < 2**33. */ - while (true) + while (1) { - if (v >= n) - { - /* If the candidate is less than n, accept it. */ - if (c < n) - /* c is uniformly distributed on [0, n). */ - return c; - else - { - /* c is uniformly distributed on [n, v). */ - v -= n; - c -= n; - /* The distribution was shifted, so c is uniformly - distributed on [0, v) again. */ - } - } - /* v < n here. */ - - /* Replenish the bit source if necessary. */ - if (bits_length == 0) - { - /* Overwrite the least significant byte. */ - random_bytes (&bits, 1); - bits_length = CHAR_BIT; - } - - /* Double the range. No overflow because v < n < 2**32. */ - v *= 2; - /* v < 2n here. */ - - /* Extract a bit and append it to c. c remains less than v and - thus 2**33. */ - c = (c << 1) | (bits & 1); - bits >>= 1; - --bits_length; - - /* At this point, c is uniformly distributed on [0, v) again, - and v < 2n < 2**33. */ + uint32_t value = __arc4random (); + + /* Return if the lower power of 2 minus 1 satisfy the condition. */ + uint32_t r = value & mask; + if (r < n) + return r; + + /* Otherwise check if remaining bits of entropy provides fits in the + bound. */ + for (int bits_left = z; bits_left >= bits; bits_left -= bits) + { + value >>= bits; + r = value & mask; + if (r < n) + return r; + } } } libc_hidden_def (__arc4random_uniform)
It uses the bitmask with rejection [1], which calculates a mask being the lowest power of two bounding the request upper bound, successively queries new random values, and rejects values outside the requested range. Performance-wise, there is no much gain in trying to conserve bits since arc4random is wrapper on getrandom syscall. It should be cheaper to just query a uint32_t value. The algorithm also avoids modulo and divide operations, which might be costly depending of the architecture. [1] https://www.pcg-random.org/posts/bounded-rands.html Reviewed-by: Yann Droneaud <ydroneaud@opteya.com> --- v2: Fixed typos in commit message and simplify loop. --- stdlib/arc4random_uniform.c | 129 +++++++++--------------------------- 1 file changed, 30 insertions(+), 99 deletions(-)