diff mbox series

libgcc: Fix up bitint division [PR114397]

Message ID ZfvsFk8ZMIMLpovA@tucnak
State New
Headers show
Series libgcc: Fix up bitint division [PR114397] | expand

Commit Message

Jakub Jelinek March 21, 2024, 8:13 a.m. UTC
Hi!

The Knuth's division algorithm relies on the number of dividend limbs
to be greater ore equal to number of divisor limbs, which is why
I've added a special case for un < vn at the start of __divmodbitint4.
Unfortunately, my assumption that it then implies abs(v) > abs(u) and
so quotient must be 0 and remainder same as dividend is incorrect.
This is because this check is done before negation of the operands.
While bitint_reduce_prec reduces precision from clearly useless limbs,
the problematic case is when the dividend is unsigned or non-negative
and divisor is negative.  We can have limbs (from MS to LS):
dividend:	0	M	?...
divisor:	-1	-N	?...
where M has most significant bit set and M >= N (if M == N then it
also the following limbs matter) and the most significant limbs can
be even partial.  In this case, the quotient should be -1 rather than
0.  bitint_reduce_prec will reduce the precision of the dividend so
that M is the most significant limb, but can't reduce precision of the
divisor to more than having the -1 as most significant limb, because
-N doesn't have the most significant bit set.

The following patch fixes it by detecting this problematic case in the
un < vn handling, and instead of assuming q is 0 and r is u will
decrease vn by 1 because it knows the later code will negate the divisor
and it can be then expressed after negation in one fewer limbs.

Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?

2024-03-21  Jakub Jelinek  <jakub@redhat.com>

	PR libgcc/114397
	* libgcc2.c (__divmodbitint4): Don't assume un < vn always means
	abs(v) > abs(u), check for a special case of un + 1 == vn where
	u is non-negative and v negative and after v's negation vn could
	be reduced by 1.

	* gcc.dg/torture/bitint-65.c: New test.


	Jakub

Comments

Richard Biener March 21, 2024, 8:35 a.m. UTC | #1
On Thu, 21 Mar 2024, Jakub Jelinek wrote:

> Hi!
> 
> The Knuth's division algorithm relies on the number of dividend limbs
> to be greater ore equal to number of divisor limbs, which is why
> I've added a special case for un < vn at the start of __divmodbitint4.
> Unfortunately, my assumption that it then implies abs(v) > abs(u) and
> so quotient must be 0 and remainder same as dividend is incorrect.
> This is because this check is done before negation of the operands.
> While bitint_reduce_prec reduces precision from clearly useless limbs,
> the problematic case is when the dividend is unsigned or non-negative
> and divisor is negative.  We can have limbs (from MS to LS):
> dividend:	0	M	?...
> divisor:	-1	-N	?...
> where M has most significant bit set and M >= N (if M == N then it
> also the following limbs matter) and the most significant limbs can
> be even partial.  In this case, the quotient should be -1 rather than
> 0.  bitint_reduce_prec will reduce the precision of the dividend so
> that M is the most significant limb, but can't reduce precision of the
> divisor to more than having the -1 as most significant limb, because
> -N doesn't have the most significant bit set.
> 
> The following patch fixes it by detecting this problematic case in the
> un < vn handling, and instead of assuming q is 0 and r is u will
> decrease vn by 1 because it knows the later code will negate the divisor
> and it can be then expressed after negation in one fewer limbs.
> 
> Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?

OK.

Richard.

> 2024-03-21  Jakub Jelinek  <jakub@redhat.com>
> 
> 	PR libgcc/114397
> 	* libgcc2.c (__divmodbitint4): Don't assume un < vn always means
> 	abs(v) > abs(u), check for a special case of un + 1 == vn where
> 	u is non-negative and v negative and after v's negation vn could
> 	be reduced by 1.
> 
> 	* gcc.dg/torture/bitint-65.c: New test.
> 
> --- libgcc/libgcc2.c.jj	2024-03-15 19:04:27.000000000 +0100
> +++ libgcc/libgcc2.c	2024-03-20 18:23:51.956879521 +0100
> @@ -1707,44 +1707,67 @@ __divmodbitint4 (UBILtype *q, SItype qpr
>    USItype vp = avprec % W_TYPE_SIZE;
>    if (__builtin_expect (un < vn, 0))
>      {
> -      /* If abs(v) > abs(u), then q is 0 and r is u.  */
> -      if (q)
> -	__builtin_memset (q, 0, qn * sizeof (UWtype));
> -      if (r == NULL)
> -	return;
> -#if __LIBGCC_BITINT_ORDER__ == __ORDER_BIG_ENDIAN__
> -      r += rn - 1;
> -      u += un - 1;
> -#endif
> -      if (up)
> -	--un;
> -      if (rn < un)
> -	un = rn;
> -      for (rn -= un; un; --un)
> +      /* If abs(v) > abs(u), then q is 0 and r is u.
> +	 Unfortunately un < vn doesn't always mean abs(v) > abs(u).
> +	 If uprec > 0 and vprec < 0 and vn == un + 1, if the
> +	 top limb of v is all ones and the second most significant
> +	 limb has most significant bit clear, then just decrease
> +	 vn/avprec/vp and continue, after negation both numbers
> +	 will have the same number of limbs.  */
> +      if (un + 1 == vn
> +	  && uprec >= 0
> +	  && vprec < 0
> +	  && ((v[BITINT_END (0, vn - 1)] | (vp ? ((UWtype) -1 << vp) : 0))
> +	      == (UWtype) -1)
> +	  && (Wtype) v[BITINT_END (1, vn - 2)] >= 0)
>  	{
> -	  *r = *u;
> -	  r += BITINT_INC;
> -	  u += BITINT_INC;
> +	  vp = 0;
> +	  --vn;
> +#if __LIBGCC_BITINT_ORDER__ == __ORDER_BIG_ENDIAN__
> +	  ++v;
> +#endif
>  	}
> -      if (!rn)
> -	return;
> -      if (up)
> +      else
>  	{
> -	  if (uprec > 0)
> -	    *r = *u & (((UWtype) 1 << up) - 1);
> -	  else
> -	    *r = *u | ((UWtype) -1 << up);
> -	  r += BITINT_INC;
> -	  if (!--rn)
> +	  /* q is 0 and r is u.  */
> +	  if (q)
> +	    __builtin_memset (q, 0, qn * sizeof (UWtype));
> +	  if (r == NULL)
>  	    return;
> +#if __LIBGCC_BITINT_ORDER__ == __ORDER_BIG_ENDIAN__
> +	  r += rn - 1;
> +	  u += un - 1;
> +#endif
> +	  if (up)
> +	    --un;
> +	  if (rn < un)
> +	    un = rn;
> +	  for (rn -= un; un; --un)
> +	    {
> +	      *r = *u;
> +	      r += BITINT_INC;
> +	      u += BITINT_INC;
> +	    }
> +	  if (!rn)
> +	    return;
> +	  if (up)
> +	    {
> +	      if (uprec > 0)
> +		*r = *u & (((UWtype) 1 << up) - 1);
> +	      else
> +		*r = *u | ((UWtype) -1 << up);
> +	      r += BITINT_INC;
> +	      if (!--rn)
> +		return;
> +	    }
> +	  UWtype c = uprec < 0 ? (UWtype) -1 : (UWtype) 0;
> +	  for (; rn; --rn)
> +	    {
> +	      *r = c;
> +	      r += BITINT_INC;
> +	    }
> +	  return;
>  	}
> -      UWtype c = uprec < 0 ? (UWtype) -1 : (UWtype) 0;
> -      for (; rn; --rn)
> -	{
> -	  *r = c;
> -	  r += BITINT_INC;
> -	}
> -      return;
>      }
>    USItype qn2 = un - vn + 1;
>    if (qn >= qn2)
> --- gcc/testsuite/gcc.dg/torture/bitint-65.c.jj	2024-03-20 18:41:38.026311007 +0100
> +++ gcc/testsuite/gcc.dg/torture/bitint-65.c	2024-03-20 18:40:18.604397871 +0100
> @@ -0,0 +1,44 @@
> +/* PR libgcc/114397 */
> +/* { dg-do run { target bitint } } */
> +/* { dg-options "-std=c23" } */
> +/* { dg-skip-if "" { ! run_expensive_tests }  { "*" } { "-O0" "-O2" } } */
> +/* { dg-skip-if "" { ! run_expensive_tests } { "-flto" } { "" } } */
> +
> +#if __BITINT_MAXWIDTH__ >= 129
> +int
> +foo (unsigned _BitInt (128) a, _BitInt (129) b)
> +{
> +  return a / b;
> +}
> +#endif
> +
> +#if __BITINT_MAXWIDTH__ >= 192
> +int
> +bar (unsigned _BitInt (128) a, _BitInt (192) b)
> +{
> +  return a / b;
> +}
> +#endif
> +
> +int
> +main ()
> +{
> +#if __BITINT_MAXWIDTH__ >= 129
> +  if (foo (336225022742818342628768636932743029911uwb,
> +	   -336225022742818342628768636932743029911wb) != -1
> +      || foo (336225022742818342628768636932743029912uwb,
> +	      -336225022742818342628768636932743029911wb) != -1
> +      || foo (336225022742818342628768636932743029911uwb,
> +	      -336225022742818342628768636932743029912wb) != 0)
> +    __builtin_abort ();
> +#endif
> +#if __BITINT_MAXWIDTH__ >= 192
> +  if (bar (336225022742818342628768636932743029911uwb,
> +	   -336225022742818342628768636932743029911wb) != -1
> +      || bar (336225022742818342628768636932743029912uwb,
> +	      -336225022742818342628768636932743029911wb) != -1
> +      || bar (336225022742818342628768636932743029911uwb,
> +	      -336225022742818342628768636932743029912wb) != 0)
> +    __builtin_abort ();
> +#endif
> +}
> 
> 	Jakub
> 
>
diff mbox series

Patch

--- libgcc/libgcc2.c.jj	2024-03-15 19:04:27.000000000 +0100
+++ libgcc/libgcc2.c	2024-03-20 18:23:51.956879521 +0100
@@ -1707,44 +1707,67 @@  __divmodbitint4 (UBILtype *q, SItype qpr
   USItype vp = avprec % W_TYPE_SIZE;
   if (__builtin_expect (un < vn, 0))
     {
-      /* If abs(v) > abs(u), then q is 0 and r is u.  */
-      if (q)
-	__builtin_memset (q, 0, qn * sizeof (UWtype));
-      if (r == NULL)
-	return;
-#if __LIBGCC_BITINT_ORDER__ == __ORDER_BIG_ENDIAN__
-      r += rn - 1;
-      u += un - 1;
-#endif
-      if (up)
-	--un;
-      if (rn < un)
-	un = rn;
-      for (rn -= un; un; --un)
+      /* If abs(v) > abs(u), then q is 0 and r is u.
+	 Unfortunately un < vn doesn't always mean abs(v) > abs(u).
+	 If uprec > 0 and vprec < 0 and vn == un + 1, if the
+	 top limb of v is all ones and the second most significant
+	 limb has most significant bit clear, then just decrease
+	 vn/avprec/vp and continue, after negation both numbers
+	 will have the same number of limbs.  */
+      if (un + 1 == vn
+	  && uprec >= 0
+	  && vprec < 0
+	  && ((v[BITINT_END (0, vn - 1)] | (vp ? ((UWtype) -1 << vp) : 0))
+	      == (UWtype) -1)
+	  && (Wtype) v[BITINT_END (1, vn - 2)] >= 0)
 	{
-	  *r = *u;
-	  r += BITINT_INC;
-	  u += BITINT_INC;
+	  vp = 0;
+	  --vn;
+#if __LIBGCC_BITINT_ORDER__ == __ORDER_BIG_ENDIAN__
+	  ++v;
+#endif
 	}
-      if (!rn)
-	return;
-      if (up)
+      else
 	{
-	  if (uprec > 0)
-	    *r = *u & (((UWtype) 1 << up) - 1);
-	  else
-	    *r = *u | ((UWtype) -1 << up);
-	  r += BITINT_INC;
-	  if (!--rn)
+	  /* q is 0 and r is u.  */
+	  if (q)
+	    __builtin_memset (q, 0, qn * sizeof (UWtype));
+	  if (r == NULL)
 	    return;
+#if __LIBGCC_BITINT_ORDER__ == __ORDER_BIG_ENDIAN__
+	  r += rn - 1;
+	  u += un - 1;
+#endif
+	  if (up)
+	    --un;
+	  if (rn < un)
+	    un = rn;
+	  for (rn -= un; un; --un)
+	    {
+	      *r = *u;
+	      r += BITINT_INC;
+	      u += BITINT_INC;
+	    }
+	  if (!rn)
+	    return;
+	  if (up)
+	    {
+	      if (uprec > 0)
+		*r = *u & (((UWtype) 1 << up) - 1);
+	      else
+		*r = *u | ((UWtype) -1 << up);
+	      r += BITINT_INC;
+	      if (!--rn)
+		return;
+	    }
+	  UWtype c = uprec < 0 ? (UWtype) -1 : (UWtype) 0;
+	  for (; rn; --rn)
+	    {
+	      *r = c;
+	      r += BITINT_INC;
+	    }
+	  return;
 	}
-      UWtype c = uprec < 0 ? (UWtype) -1 : (UWtype) 0;
-      for (; rn; --rn)
-	{
-	  *r = c;
-	  r += BITINT_INC;
-	}
-      return;
     }
   USItype qn2 = un - vn + 1;
   if (qn >= qn2)
--- gcc/testsuite/gcc.dg/torture/bitint-65.c.jj	2024-03-20 18:41:38.026311007 +0100
+++ gcc/testsuite/gcc.dg/torture/bitint-65.c	2024-03-20 18:40:18.604397871 +0100
@@ -0,0 +1,44 @@ 
+/* PR libgcc/114397 */
+/* { dg-do run { target bitint } } */
+/* { dg-options "-std=c23" } */
+/* { dg-skip-if "" { ! run_expensive_tests }  { "*" } { "-O0" "-O2" } } */
+/* { dg-skip-if "" { ! run_expensive_tests } { "-flto" } { "" } } */
+
+#if __BITINT_MAXWIDTH__ >= 129
+int
+foo (unsigned _BitInt (128) a, _BitInt (129) b)
+{
+  return a / b;
+}
+#endif
+
+#if __BITINT_MAXWIDTH__ >= 192
+int
+bar (unsigned _BitInt (128) a, _BitInt (192) b)
+{
+  return a / b;
+}
+#endif
+
+int
+main ()
+{
+#if __BITINT_MAXWIDTH__ >= 129
+  if (foo (336225022742818342628768636932743029911uwb,
+	   -336225022742818342628768636932743029911wb) != -1
+      || foo (336225022742818342628768636932743029912uwb,
+	      -336225022742818342628768636932743029911wb) != -1
+      || foo (336225022742818342628768636932743029911uwb,
+	      -336225022742818342628768636932743029912wb) != 0)
+    __builtin_abort ();
+#endif
+#if __BITINT_MAXWIDTH__ >= 192
+  if (bar (336225022742818342628768636932743029911uwb,
+	   -336225022742818342628768636932743029911wb) != -1
+      || bar (336225022742818342628768636932743029912uwb,
+	      -336225022742818342628768636932743029911wb) != -1
+      || bar (336225022742818342628768636932743029911uwb,
+	      -336225022742818342628768636932743029912wb) != 0)
+    __builtin_abort ();
+#endif
+}