diff mbox

[RFC,LIBGCC,1,of,2] 64 bit divide implementation for processor without hw divide instruction

Message ID 529009F5.10208@linaro.org
State New
Headers show

Commit Message

Kugan Vivekanandarajah Nov. 23, 2013, 1:50 a.m. UTC
Hi All,

This RFC patch series implements a simple align divisor shift dividend
method.

Regression tested on arm-none-linux-gnueabi with no issues.

OK?

Thanks,
Kugan

+2013-11-22  Kugan Vivekanandarajah  <kuganv@linaro.org>
+
+	* libgcc/libgcc2.c (__udivmoddi4): Define new implementation when
+	HAVE_NO_HW_DIVIDE is defined, for processors without any divide
+     instructions.
+

Comments

Ian Lance Taylor Nov. 23, 2013, 3:14 p.m. UTC | #1
Kugan <kugan.vivekanandarajah@linaro.org> writes:

> This RFC patch series implements a simple align divisor shift dividend
> method.
>
> Regression tested on arm-none-linux-gnueabi with no issues.
>
> OK?
>
> Thanks,
> Kugan
>
> +2013-11-22  Kugan Vivekanandarajah  <kuganv@linaro.org>
> +
> +	* libgcc/libgcc2.c (__udivmoddi4): Define new implementation when
> +	HAVE_NO_HW_DIVIDE is defined, for processors without any divide
> +     instructions.


The code looks fine to me.

You should document HAVE_NO_HW_DIVIDE in gcc/doc/tm.texi in the Library
Calls section.  The macro should probably be something like
TARGET_HAS_NO_HW_DIVIDE.

Ian
diff mbox

Patch

diff --git a/libgcc/libgcc2.c b/libgcc/libgcc2.c
index bec411b..a1d3fbc 100644
--- a/libgcc/libgcc2.c
+++ b/libgcc/libgcc2.c
@@ -934,6 +934,74 @@  __parityDI2 (UDWtype x)
 #endif
 
 #ifdef L_udivmoddi4
+#ifdef HAVE_NO_HW_DIVIDE
+
+#if (defined (L_udivdi3) || defined (L_divdi3) || \
+     defined (L_umoddi3) || defined (L_moddi3))
+static inline __attribute__ ((__always_inline__))
+#endif
+UDWtype
+__udivmoddi4 (UDWtype n, UDWtype d, UDWtype *rp)
+{
+  UDWtype q = 0, r = n, y = d;
+  UWtype lz1, lz2, i, k;
+
+  /* Implements align divisor shift dividend method. This algorithm
+     aligns the divisor under the dividend and then perform number of
+     test-subtract iterations which shift the dividend left. Number of
+     iterations is k + 1 where k is the number of bit positions the
+     divisor must be shifted left  to align it under the dividend.
+     quotient bits can be saved in the rightmost positions of the dividend
+     as it shifts left on each test-subtract iteration. */
+
+  if (y <= r)
+    {
+      lz1 = __builtin_clzll (d);
+      lz2 = __builtin_clzll (n);
+
+      k = lz1 - lz2;
+      y = (y << k);
+
+      /* Dividend can exceed 2 ^ (width − 1) − 1 but still be less than the
+	 aligned divisor. Normal iteration can drops the high order bit
+	 of the dividend. Therefore, first test-subtract iteration is a
+	 special case, saving its quotient bit in a separate location and
+	 not shifting the dividend. */
+      if (r >= y)
+	{
+	  r = r - y;
+	  q =  (1ULL << k);
+	}
+
+      if (k > 0)
+	{
+	  y = y >> 1;
+
+	  /* k additional iterations where k regular test subtract shift
+	    dividend iterations are done.  */
+	  i = k;
+	  do
+	    {
+	      if (r >= y)
+		r = ((r - y) << 1) + 1;
+	      else
+		r =  (r << 1);
+	      i = i - 1;
+	    } while (i != 0);
+
+	  /* First quotient bit is combined with the quotient bits resulting
+	     from the k regular iterations.  */
+	  q = q + r;
+	  r = r >> k;
+	  q = q - (r << k);
+	}
+    }
+
+  if (rp)
+    *rp = r;
+  return q;
+}
+#else
 
 #if (defined (L_udivdi3) || defined (L_divdi3) || \
      defined (L_umoddi3) || defined (L_moddi3))
@@ -1152,6 +1220,7 @@  __udivmoddi4 (UDWtype n, UDWtype d, UDWtype *rp)
   return ww.ll;
 }
 #endif
+#endif
 
 #ifdef L_divdi3
 DWtype