[2/7] sin/cos slow paths: remove large range reduction

Message ID DB6PR0801MB2053FBB12209FA83A345256F83AA0@DB6PR0801MB2053.eurprd08.prod.outlook.com
State New
Headers show
Series
  • [1/7] sin/cos slow paths: avoid slow paths for small inputs
Related show

Commit Message

Wilco Dijkstra March 21, 2018, 5:51 p.m.
This patch removes the large range reduction code and defers to the huge range
reduction code.  The first level range reducer supports inputs up to 2^27,
which is way too large given that inputs for sin/cos are typically small
(< 10), and optimizing for a smaller range would give a significant speedup.

Input values above 2^27 are practically never used, so there is no reason for
supporting range reduction between 2^27 and 2^48.  Removing it significantly
simplifies code and enables further speedups.  There is about a 2.3x slowdown
in this range due to __branred being extremely slow (a better algorithm could
easily more than double performance).


ChangeLog:
2018-03-20  Wilco Dijkstra  <wdijkstr@arm.com>

	* sysdeps/ieee754/dbl-64/s_sin.c (reduce_sincos_2): Remove function.
	(do_sincos_2): Likewise.
	(__sin): Remove middle range reduction case.
	(__cos): Likewise.
	* sysdeps/ieee754/dbl-64/s_sincos.c (__sincos): Remove middle range
	reduction case.
--

Comments

Joseph Myers March 22, 2018, 10:14 p.m. | #1
On Wed, 21 Mar 2018, Wilco Dijkstra wrote:

> This patch removes the large range reduction code and defers to the huge range
> reduction code.  The first level range reducer supports inputs up to 2^27,
> which is way too large given that inputs for sin/cos are typically small
> (< 10), and optimizing for a smaller range would give a significant speedup.
> 
> Input values above 2^27 are practically never used, so there is no reason for
> supporting range reduction between 2^27 and 2^48.  Removing it significantly
> simplifies code and enables further speedups.  There is about a 2.3x slowdown
> in this range due to __branred being extremely slow (a better algorithm could
> easily more than double performance).

OK.

Patch

diff --git a/sysdeps/ieee754/dbl-64/s_sin.c b/sysdeps/ieee754/dbl-64/s_sin.c
index 0c16b728df127ad54039da3eec376e5f1fe4c852..c86fb9f2aa9f18418defc522830a7b8f85c1dfae 100644
--- a/sysdeps/ieee754/dbl-64/s_sin.c
+++ b/sysdeps/ieee754/dbl-64/s_sin.c
@@ -362,80 +362,6 @@  do_sincos_1 (double a, double da, double x, int4 n, bool shift_quadrant)
   return retval;
 }
 
-static inline int4
-__always_inline
-reduce_sincos_2 (double x, double *a, double *da)
-{
-  mynumber v;
-
-  double t = (x * hpinv + toint);
-  double xn = t - toint;
-  v.x = t;
-  double xn1 = (xn + 8.0e22) - 8.0e22;
-  double xn2 = xn - xn1;
-  double y = ((((x - xn1 * mp1) - xn1 * mp2) - xn2 * mp1) - xn2 * mp2);
-  int4 n = v.i[LOW_HALF] & 3;
-  double db = xn1 * pp3;
-  t = y - db;
-  db = (y - t) - db;
-  db = (db - xn2 * pp3) - xn * pp4;
-  double b = t + db;
-  db = (t - b) + db;
-
-  *a = b;
-  *da = db;
-
-  return n;
-}
-
-/* Compute sin (A + DA).  cos can be computed by passing SHIFT_QUADRANT as
-   true, which results in shifting the quadrant N clockwise.  */
-static double
-__always_inline
-do_sincos_2 (double a, double da, double x, int4 n, bool shift_quadrant)
-{
-  double res, retval, cor, xx;
-
-  double eps = 1.0e-24;
-
-  int4 k = (n + shift_quadrant) & 3;
-
-  switch (k)
-    {
-    case 2:
-      a = -a;
-      da = -da;
-      /* Fall through.  */
-    case 0:
-      xx = a * a;
-      if (xx < 0.01588)
-	{
-	  /* Taylor series.  */
-	  res = TAYLOR_SIN (xx, a, da, cor);
-	  cor = 1.02 * cor + __copysign (eps, cor);
-	  retval = (res == res + cor) ? res : bsloww (a, da, x, n);
-	}
-      else
-	{
-	  res = do_sin (a, da, &cor);
-	  cor = 1.035 * cor + __copysign (eps, cor);
-	  retval = ((res == res + cor) ? __copysign (res, a)
-		    : bsloww1 (a, da, x, n));
-	}
-      break;
-
-    case 1:
-    case 3:
-      res = do_cos (a, da, &cor);
-      cor = 1.025 * cor + __copysign (eps, cor);
-      retval = ((res == res + cor) ? ((n & 2) ? -res : res)
-		: bsloww2 (a, da, x, n));
-      break;
-    }
-
-  return retval;
-}
-
 /*******************************************************************/
 /* An ultimate sin routine. Given an IEEE double machine number x   */
 /* it computes the correctly rounded (to nearest) value of sin(x)  */
@@ -498,16 +424,7 @@  __sin (double x)
       retval = do_sincos_1 (a, da, x, n, false);
     }				/*   else  if (k <  0x419921FB )    */
 
-/*---------------------105414350 <|x|< 281474976710656 --------------------*/
-  else if (k < 0x42F00000)
-    {
-      double a, da;
-
-      int4 n = reduce_sincos_2 (x, &a, &da);
-      retval = do_sincos_2 (a, da, x, n, false);
-    }				/*   else  if (k <  0x42F00000 )   */
-
-/* -----------------281474976710656 <|x| <2^1024----------------------------*/
+/* --------------------105414350 <|x| <2^1024------------------------------*/
   else if (k < 0x7ff00000)
     retval = reduce_and_compute (x, false);
 
@@ -584,15 +501,7 @@  __cos (double x)
       retval = do_sincos_1 (a, da, x, n, true);
     }				/*   else  if (k <  0x419921FB )    */
 
-  else if (k < 0x42F00000)
-    {
-      double a, da;
-
-      int4 n = reduce_sincos_2 (x, &a, &da);
-      retval = do_sincos_2 (a, da, x, n, true);
-    }				/*   else  if (k <  0x42F00000 )    */
-
-  /* 281474976710656 <|x| <2^1024 */
+  /* 105414350 <|x| <2^1024 */
   else if (k < 0x7ff00000)
     retval = reduce_and_compute (x, true);
 
diff --git a/sysdeps/ieee754/dbl-64/s_sincos.c b/sysdeps/ieee754/dbl-64/s_sincos.c
index e1977ea7e93c32cca5369677f23e68f8f797a9f4..a9af8ce526bfe78c06cfafa65de0815ec69585c5 100644
--- a/sysdeps/ieee754/dbl-64/s_sincos.c
+++ b/sysdeps/ieee754/dbl-64/s_sincos.c
@@ -86,16 +86,6 @@  __sincos (double x, double *sinx, double *cosx)
 
       return;
     }
-  if (k < 0x42F00000)
-    {
-      double a, da;
-      int4 n = reduce_sincos_2 (x, &a, &da);
-
-      *sinx = do_sincos_2 (a, da, x, n, false);
-      *cosx = do_sincos_2 (a, da, x, n, true);
-
-      return;
-    }
   if (k < 0x7ff00000)
     {
       reduce_and_compute_sincos (x, sinx, cosx);