Message ID | ZfvsFk8ZMIMLpovA@tucnak |
---|---|
State | New |
Headers | show |
Series | libgcc: Fix up bitint division [PR114397] | expand |
On Thu, 21 Mar 2024, Jakub Jelinek wrote: > Hi! > > The Knuth's division algorithm relies on the number of dividend limbs > to be greater ore equal to number of divisor limbs, which is why > I've added a special case for un < vn at the start of __divmodbitint4. > Unfortunately, my assumption that it then implies abs(v) > abs(u) and > so quotient must be 0 and remainder same as dividend is incorrect. > This is because this check is done before negation of the operands. > While bitint_reduce_prec reduces precision from clearly useless limbs, > the problematic case is when the dividend is unsigned or non-negative > and divisor is negative. We can have limbs (from MS to LS): > dividend: 0 M ?... > divisor: -1 -N ?... > where M has most significant bit set and M >= N (if M == N then it > also the following limbs matter) and the most significant limbs can > be even partial. In this case, the quotient should be -1 rather than > 0. bitint_reduce_prec will reduce the precision of the dividend so > that M is the most significant limb, but can't reduce precision of the > divisor to more than having the -1 as most significant limb, because > -N doesn't have the most significant bit set. > > The following patch fixes it by detecting this problematic case in the > un < vn handling, and instead of assuming q is 0 and r is u will > decrease vn by 1 because it knows the later code will negate the divisor > and it can be then expressed after negation in one fewer limbs. > > Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk? OK. Richard. > 2024-03-21 Jakub Jelinek <jakub@redhat.com> > > PR libgcc/114397 > * libgcc2.c (__divmodbitint4): Don't assume un < vn always means > abs(v) > abs(u), check for a special case of un + 1 == vn where > u is non-negative and v negative and after v's negation vn could > be reduced by 1. > > * gcc.dg/torture/bitint-65.c: New test. > > --- libgcc/libgcc2.c.jj 2024-03-15 19:04:27.000000000 +0100 > +++ libgcc/libgcc2.c 2024-03-20 18:23:51.956879521 +0100 > @@ -1707,44 +1707,67 @@ __divmodbitint4 (UBILtype *q, SItype qpr > USItype vp = avprec % W_TYPE_SIZE; > if (__builtin_expect (un < vn, 0)) > { > - /* If abs(v) > abs(u), then q is 0 and r is u. */ > - if (q) > - __builtin_memset (q, 0, qn * sizeof (UWtype)); > - if (r == NULL) > - return; > -#if __LIBGCC_BITINT_ORDER__ == __ORDER_BIG_ENDIAN__ > - r += rn - 1; > - u += un - 1; > -#endif > - if (up) > - --un; > - if (rn < un) > - un = rn; > - for (rn -= un; un; --un) > + /* If abs(v) > abs(u), then q is 0 and r is u. > + Unfortunately un < vn doesn't always mean abs(v) > abs(u). > + If uprec > 0 and vprec < 0 and vn == un + 1, if the > + top limb of v is all ones and the second most significant > + limb has most significant bit clear, then just decrease > + vn/avprec/vp and continue, after negation both numbers > + will have the same number of limbs. */ > + if (un + 1 == vn > + && uprec >= 0 > + && vprec < 0 > + && ((v[BITINT_END (0, vn - 1)] | (vp ? ((UWtype) -1 << vp) : 0)) > + == (UWtype) -1) > + && (Wtype) v[BITINT_END (1, vn - 2)] >= 0) > { > - *r = *u; > - r += BITINT_INC; > - u += BITINT_INC; > + vp = 0; > + --vn; > +#if __LIBGCC_BITINT_ORDER__ == __ORDER_BIG_ENDIAN__ > + ++v; > +#endif > } > - if (!rn) > - return; > - if (up) > + else > { > - if (uprec > 0) > - *r = *u & (((UWtype) 1 << up) - 1); > - else > - *r = *u | ((UWtype) -1 << up); > - r += BITINT_INC; > - if (!--rn) > + /* q is 0 and r is u. */ > + if (q) > + __builtin_memset (q, 0, qn * sizeof (UWtype)); > + if (r == NULL) > return; > +#if __LIBGCC_BITINT_ORDER__ == __ORDER_BIG_ENDIAN__ > + r += rn - 1; > + u += un - 1; > +#endif > + if (up) > + --un; > + if (rn < un) > + un = rn; > + for (rn -= un; un; --un) > + { > + *r = *u; > + r += BITINT_INC; > + u += BITINT_INC; > + } > + if (!rn) > + return; > + if (up) > + { > + if (uprec > 0) > + *r = *u & (((UWtype) 1 << up) - 1); > + else > + *r = *u | ((UWtype) -1 << up); > + r += BITINT_INC; > + if (!--rn) > + return; > + } > + UWtype c = uprec < 0 ? (UWtype) -1 : (UWtype) 0; > + for (; rn; --rn) > + { > + *r = c; > + r += BITINT_INC; > + } > + return; > } > - UWtype c = uprec < 0 ? (UWtype) -1 : (UWtype) 0; > - for (; rn; --rn) > - { > - *r = c; > - r += BITINT_INC; > - } > - return; > } > USItype qn2 = un - vn + 1; > if (qn >= qn2) > --- gcc/testsuite/gcc.dg/torture/bitint-65.c.jj 2024-03-20 18:41:38.026311007 +0100 > +++ gcc/testsuite/gcc.dg/torture/bitint-65.c 2024-03-20 18:40:18.604397871 +0100 > @@ -0,0 +1,44 @@ > +/* PR libgcc/114397 */ > +/* { dg-do run { target bitint } } */ > +/* { dg-options "-std=c23" } */ > +/* { dg-skip-if "" { ! run_expensive_tests } { "*" } { "-O0" "-O2" } } */ > +/* { dg-skip-if "" { ! run_expensive_tests } { "-flto" } { "" } } */ > + > +#if __BITINT_MAXWIDTH__ >= 129 > +int > +foo (unsigned _BitInt (128) a, _BitInt (129) b) > +{ > + return a / b; > +} > +#endif > + > +#if __BITINT_MAXWIDTH__ >= 192 > +int > +bar (unsigned _BitInt (128) a, _BitInt (192) b) > +{ > + return a / b; > +} > +#endif > + > +int > +main () > +{ > +#if __BITINT_MAXWIDTH__ >= 129 > + if (foo (336225022742818342628768636932743029911uwb, > + -336225022742818342628768636932743029911wb) != -1 > + || foo (336225022742818342628768636932743029912uwb, > + -336225022742818342628768636932743029911wb) != -1 > + || foo (336225022742818342628768636932743029911uwb, > + -336225022742818342628768636932743029912wb) != 0) > + __builtin_abort (); > +#endif > +#if __BITINT_MAXWIDTH__ >= 192 > + if (bar (336225022742818342628768636932743029911uwb, > + -336225022742818342628768636932743029911wb) != -1 > + || bar (336225022742818342628768636932743029912uwb, > + -336225022742818342628768636932743029911wb) != -1 > + || bar (336225022742818342628768636932743029911uwb, > + -336225022742818342628768636932743029912wb) != 0) > + __builtin_abort (); > +#endif > +} > > Jakub > >
--- libgcc/libgcc2.c.jj 2024-03-15 19:04:27.000000000 +0100 +++ libgcc/libgcc2.c 2024-03-20 18:23:51.956879521 +0100 @@ -1707,44 +1707,67 @@ __divmodbitint4 (UBILtype *q, SItype qpr USItype vp = avprec % W_TYPE_SIZE; if (__builtin_expect (un < vn, 0)) { - /* If abs(v) > abs(u), then q is 0 and r is u. */ - if (q) - __builtin_memset (q, 0, qn * sizeof (UWtype)); - if (r == NULL) - return; -#if __LIBGCC_BITINT_ORDER__ == __ORDER_BIG_ENDIAN__ - r += rn - 1; - u += un - 1; -#endif - if (up) - --un; - if (rn < un) - un = rn; - for (rn -= un; un; --un) + /* If abs(v) > abs(u), then q is 0 and r is u. + Unfortunately un < vn doesn't always mean abs(v) > abs(u). + If uprec > 0 and vprec < 0 and vn == un + 1, if the + top limb of v is all ones and the second most significant + limb has most significant bit clear, then just decrease + vn/avprec/vp and continue, after negation both numbers + will have the same number of limbs. */ + if (un + 1 == vn + && uprec >= 0 + && vprec < 0 + && ((v[BITINT_END (0, vn - 1)] | (vp ? ((UWtype) -1 << vp) : 0)) + == (UWtype) -1) + && (Wtype) v[BITINT_END (1, vn - 2)] >= 0) { - *r = *u; - r += BITINT_INC; - u += BITINT_INC; + vp = 0; + --vn; +#if __LIBGCC_BITINT_ORDER__ == __ORDER_BIG_ENDIAN__ + ++v; +#endif } - if (!rn) - return; - if (up) + else { - if (uprec > 0) - *r = *u & (((UWtype) 1 << up) - 1); - else - *r = *u | ((UWtype) -1 << up); - r += BITINT_INC; - if (!--rn) + /* q is 0 and r is u. */ + if (q) + __builtin_memset (q, 0, qn * sizeof (UWtype)); + if (r == NULL) return; +#if __LIBGCC_BITINT_ORDER__ == __ORDER_BIG_ENDIAN__ + r += rn - 1; + u += un - 1; +#endif + if (up) + --un; + if (rn < un) + un = rn; + for (rn -= un; un; --un) + { + *r = *u; + r += BITINT_INC; + u += BITINT_INC; + } + if (!rn) + return; + if (up) + { + if (uprec > 0) + *r = *u & (((UWtype) 1 << up) - 1); + else + *r = *u | ((UWtype) -1 << up); + r += BITINT_INC; + if (!--rn) + return; + } + UWtype c = uprec < 0 ? (UWtype) -1 : (UWtype) 0; + for (; rn; --rn) + { + *r = c; + r += BITINT_INC; + } + return; } - UWtype c = uprec < 0 ? (UWtype) -1 : (UWtype) 0; - for (; rn; --rn) - { - *r = c; - r += BITINT_INC; - } - return; } USItype qn2 = un - vn + 1; if (qn >= qn2) --- gcc/testsuite/gcc.dg/torture/bitint-65.c.jj 2024-03-20 18:41:38.026311007 +0100 +++ gcc/testsuite/gcc.dg/torture/bitint-65.c 2024-03-20 18:40:18.604397871 +0100 @@ -0,0 +1,44 @@ +/* PR libgcc/114397 */ +/* { dg-do run { target bitint } } */ +/* { dg-options "-std=c23" } */ +/* { dg-skip-if "" { ! run_expensive_tests } { "*" } { "-O0" "-O2" } } */ +/* { dg-skip-if "" { ! run_expensive_tests } { "-flto" } { "" } } */ + +#if __BITINT_MAXWIDTH__ >= 129 +int +foo (unsigned _BitInt (128) a, _BitInt (129) b) +{ + return a / b; +} +#endif + +#if __BITINT_MAXWIDTH__ >= 192 +int +bar (unsigned _BitInt (128) a, _BitInt (192) b) +{ + return a / b; +} +#endif + +int +main () +{ +#if __BITINT_MAXWIDTH__ >= 129 + if (foo (336225022742818342628768636932743029911uwb, + -336225022742818342628768636932743029911wb) != -1 + || foo (336225022742818342628768636932743029912uwb, + -336225022742818342628768636932743029911wb) != -1 + || foo (336225022742818342628768636932743029911uwb, + -336225022742818342628768636932743029912wb) != 0) + __builtin_abort (); +#endif +#if __BITINT_MAXWIDTH__ >= 192 + if (bar (336225022742818342628768636932743029911uwb, + -336225022742818342628768636932743029911wb) != -1 + || bar (336225022742818342628768636932743029912uwb, + -336225022742818342628768636932743029911wb) != -1 + || bar (336225022742818342628768636932743029911uwb, + -336225022742818342628768636932743029912wb) != 0) + __builtin_abort (); +#endif +}