diff mbox

[5/5] Inline all support functions for sin and cos

Message ID 1471976565-3576-6-git-send-email-siddhesh@sourceware.org
State New
Headers show

Commit Message

Siddhesh Poyarekar Aug. 23, 2016, 6:22 p.m. UTC
The support functions for sin and cos have a lot of identical
functionality, so inlining them gives a pretty decent jump in
functionality: ~19% in the sincos function.  On SPEC2006 this
translates to about 2.1% in the tonto test.

	* sysdeps/ieee754/dbl-64/s_sin.c (do_cos): Mark as inline.
	(do_cos_slow): Likewise.
	(do_sin): Likewise.
	(do_sin_slow): Likewise.
	(slow): Likewise.
	(slow1): Likewise.
	(slow2): Likewise.
	(sloww): Likewise.
	(sloww1): Likewise.
	(sloww2): Likewise.
	(bsloww): Likewise.
	(bsloww1): Likewise.
	(bsloww2): Likewise.
	(cslow2): Likewise.
---
 sysdeps/ieee754/dbl-64/s_sin.c | 52 +++++++++++++++++++++++-------------------
 1 file changed, 28 insertions(+), 24 deletions(-)

Comments

Siddhesh Poyarekar Aug. 30, 2016, 3:09 a.m. UTC | #1
Ping!

On Tuesday 23 August 2016 11:52 PM, Siddhesh Poyarekar wrote:
> The support functions for sin and cos have a lot of identical
> functionality, so inlining them gives a pretty decent jump in
> functionality: ~19% in the sincos function.  On SPEC2006 this
> translates to about 2.1% in the tonto test.
> 
> 	* sysdeps/ieee754/dbl-64/s_sin.c (do_cos): Mark as inline.
> 	(do_cos_slow): Likewise.
> 	(do_sin): Likewise.
> 	(do_sin_slow): Likewise.
> 	(slow): Likewise.
> 	(slow1): Likewise.
> 	(slow2): Likewise.
> 	(sloww): Likewise.
> 	(sloww1): Likewise.
> 	(sloww2): Likewise.
> 	(bsloww): Likewise.
> 	(bsloww1): Likewise.
> 	(bsloww2): Likewise.
> 	(cslow2): Likewise.
> ---
>  sysdeps/ieee754/dbl-64/s_sin.c | 52 +++++++++++++++++++++++-------------------
>  1 file changed, 28 insertions(+), 24 deletions(-)
> 
> diff --git a/sysdeps/ieee754/dbl-64/s_sin.c b/sysdeps/ieee754/dbl-64/s_sin.c
> index 82f9345..c20ef4d 100644
> --- a/sysdeps/ieee754/dbl-64/s_sin.c
> +++ b/sysdeps/ieee754/dbl-64/s_sin.c
> @@ -145,7 +145,8 @@ static double cslow2 (double x);
>     of the number by combining the sin and cos of X (as computed by a variation
>     of the Taylor series) with the values looked up from the sin/cos table to
>     get the result in RES and a correction value in COR.  */
> -static double
> +static inline double
> +__always_inline
>  do_cos (double x, double dx, double *corp)
>  {
>    mynumber u;
> @@ -170,7 +171,8 @@ do_cos (double x, double dx, double *corp)
>  
>  /* A more precise variant of DO_COS.  EPS is the adjustment to the correction
>     COR.  */
> -static double
> +static inline double
> +__always_inline
>  do_cos_slow (double x, double dx, double eps, double *corp)
>  {
>    mynumber u;
> @@ -205,7 +207,8 @@ do_cos_slow (double x, double dx, double eps, double *corp)
>     the number by combining the sin and cos of X (as computed by a variation of
>     the Taylor series) with the values looked up from the sin/cos table to get
>     the result in RES and a correction value in COR.  */
> -static double
> +static inline double
> +__always_inline
>  do_sin (double x, double dx, double *corp)
>  {
>    mynumber u;
> @@ -229,7 +232,8 @@ do_sin (double x, double dx, double *corp)
>  
>  /* A more precise variant of DO_SIN.  EPS is the adjustment to the correction
>     COR.  */
> -static double
> +static inline double
> +__always_inline
>  do_sin_slow (double x, double dx, double eps, double *corp)
>  {
>    mynumber u;
> @@ -615,8 +619,8 @@ __cos (double x)
>  /* precision  and if still doesn't accurate enough by mpsin   or dubsin */
>  /************************************************************************/
>  
> -static double
> -SECTION
> +static inline double
> +__always_inline
>  slow (double x)
>  {
>    double res, cor, w[2];
> @@ -636,8 +640,8 @@ slow (double x)
>  /* and if result still doesn't accurate enough by mpsin   or dubsin            */
>  /*******************************************************************************/
>  
> -static double
> -SECTION
> +static inline double
> +__always_inline
>  slow1 (double x)
>  {
>    double w[2], cor, res;
> @@ -657,8 +661,8 @@ slow1 (double x)
>  /*  Routine compute sin(x) for   0.855469  <|x|<2.426265  by  __sincostab.tbl  */
>  /* and if result still doesn't accurate enough by mpsin   or dubsin       */
>  /**************************************************************************/
> -static double
> -SECTION
> +static inline double
> +__always_inline
>  slow2 (double x)
>  {
>    double w[2], y, y1, y2, cor, res;
> @@ -686,8 +690,8 @@ slow2 (double x)
>  /* result.And if result not accurate enough routine calls mpsin1 or dubsin */
>  /***************************************************************************/
>  
> -static double
> -SECTION
> +static inline double
> +__always_inline
>  sloww (double x, double dx, double orig, int k)
>  {
>    double y, t, res, cor, w[2], a, da, xn;
> @@ -747,8 +751,8 @@ sloww (double x, double dx, double orig, int k)
>  /* accurate enough routine calls  mpsin1   or dubsin                       */
>  /***************************************************************************/
>  
> -static double
> -SECTION
> +static inline double
> +__always_inline
>  sloww1 (double x, double dx, double orig, int k)
>  {
>    double w[2], cor, res;
> @@ -777,8 +781,8 @@ sloww1 (double x, double dx, double orig, int k)
>  /* accurate enough routine calls  mpsin1   or dubsin                       */
>  /***************************************************************************/
>  
> -static double
> -SECTION
> +static inline double
> +__always_inline
>  sloww2 (double x, double dx, double orig, int n)
>  {
>    double w[2], cor, res;
> @@ -808,8 +812,8 @@ sloww2 (double x, double dx, double orig, int n)
>  /* result.And if result not accurate enough routine calls other routines    */
>  /***************************************************************************/
>  
> -static double
> -SECTION
> +static inline double
> +__always_inline
>  bsloww (double x, double dx, double orig, int n)
>  {
>    double res, cor, w[2], a, da;
> @@ -837,8 +841,8 @@ bsloww (double x, double dx, double orig, int n)
>  /* And if result not  accurate enough routine calls  other routines         */
>  /***************************************************************************/
>  
> -static double
> -SECTION
> +static inline double
> +__always_inline
>  bsloww1 (double x, double dx, double orig, int n)
>  {
>    double w[2], cor, res;
> @@ -865,8 +869,8 @@ bsloww1 (double x, double dx, double orig, int n)
>  /* And if result not accurate enough routine calls  other routines          */
>  /***************************************************************************/
>  
> -static double
> -SECTION
> +static inline double
> +__always_inline
>  bsloww2 (double x, double dx, double orig, int n)
>  {
>    double w[2], cor, res;
> @@ -891,8 +895,8 @@ bsloww2 (double x, double dx, double orig, int n)
>  /* precision  and if still doesn't accurate enough by mpcos   or docos  */
>  /************************************************************************/
>  
> -static double
> -SECTION
> +static inline double
> +__always_inline
>  cslow2 (double x)
>  {
>    double w[2], cor, res;
>
Andreas Schwab Aug. 30, 2016, 7:52 a.m. UTC | #2
On Aug 23 2016, Siddhesh Poyarekar <siddhesh@sourceware.org> wrote:

> The support functions for sin and cos have a lot of identical
> functionality, so inlining them gives a pretty decent jump in
> functionality: ~19% in the sincos function.  On SPEC2006 this

What is the metric of functionality?

> translates to about 2.1% in the tonto test.

What does "tonto test" mean?

Andreas.
Ramana Radhakrishnan Aug. 30, 2016, 7:59 a.m. UTC | #3
On Tue, Aug 30, 2016 at 8:52 AM, Andreas Schwab <schwab@suse.de> wrote:
> On Aug 23 2016, Siddhesh Poyarekar <siddhesh@sourceware.org> wrote:
>
>> The support functions for sin and cos have a lot of identical
>> functionality, so inlining them gives a pretty decent jump in
>> functionality: ~19% in the sincos function.  On SPEC2006 this
>
> What is the metric of functionality?
>
>> translates to about 2.1% in the tonto test.
>
> What does "tonto test" mean?

https://www.spec.org/cpu2006/Docs/465.tonto.html



>
> Andreas.
>
> --
> Andreas Schwab, SUSE Labs, schwab@suse.de
> GPG Key fingerprint = 0196 BAD8 1CE9 1970 F4BE  1748 E4D4 88E3 0EEA B9D7
> "And now for something completely different."
Siddhesh Poyarekar Aug. 30, 2016, 8:48 a.m. UTC | #4
On Tuesday 30 August 2016 01:22 PM, Andreas Schwab wrote:
>> The support functions for sin and cos have a lot of identical
>> functionality, so inlining them gives a pretty decent jump in
>> functionality: ~19% in the sincos function.  On SPEC2006 this
> What is the metric of functionality?

Sorry, that was a typo, it should read as "a pretty decent jump in
performance" in the sincos function microbenchmark in benchtests.

>> translates to about 2.1% in the tonto test.
> What does "tonto test" mean?

The tonto test is part of the CPU2006 benchmark and it uses sincos and
its children functions for a little under half of its execution time.

Siddhesh
Joseph Myers Sept. 1, 2016, 4:36 p.m. UTC | #5
On Tue, 23 Aug 2016, Siddhesh Poyarekar wrote:

> The support functions for sin and cos have a lot of identical
> functionality, so inlining them gives a pretty decent jump in
> functionality: ~19% in the sincos function.  On SPEC2006 this
> translates to about 2.1% in the tonto test.
> 
> 	* sysdeps/ieee754/dbl-64/s_sin.c (do_cos): Mark as inline.
> 	(do_cos_slow): Likewise.
> 	(do_sin): Likewise.
> 	(do_sin_slow): Likewise.
> 	(slow): Likewise.
> 	(slow1): Likewise.
> 	(slow2): Likewise.
> 	(sloww): Likewise.
> 	(sloww1): Likewise.
> 	(sloww2): Likewise.
> 	(bsloww): Likewise.
> 	(bsloww1): Likewise.
> 	(bsloww2): Likewise.
> 	(cslow2): Likewise.

OK.
diff mbox

Patch

diff --git a/sysdeps/ieee754/dbl-64/s_sin.c b/sysdeps/ieee754/dbl-64/s_sin.c
index 82f9345..c20ef4d 100644
--- a/sysdeps/ieee754/dbl-64/s_sin.c
+++ b/sysdeps/ieee754/dbl-64/s_sin.c
@@ -145,7 +145,8 @@  static double cslow2 (double x);
    of the number by combining the sin and cos of X (as computed by a variation
    of the Taylor series) with the values looked up from the sin/cos table to
    get the result in RES and a correction value in COR.  */
-static double
+static inline double
+__always_inline
 do_cos (double x, double dx, double *corp)
 {
   mynumber u;
@@ -170,7 +171,8 @@  do_cos (double x, double dx, double *corp)
 
 /* A more precise variant of DO_COS.  EPS is the adjustment to the correction
    COR.  */
-static double
+static inline double
+__always_inline
 do_cos_slow (double x, double dx, double eps, double *corp)
 {
   mynumber u;
@@ -205,7 +207,8 @@  do_cos_slow (double x, double dx, double eps, double *corp)
    the number by combining the sin and cos of X (as computed by a variation of
    the Taylor series) with the values looked up from the sin/cos table to get
    the result in RES and a correction value in COR.  */
-static double
+static inline double
+__always_inline
 do_sin (double x, double dx, double *corp)
 {
   mynumber u;
@@ -229,7 +232,8 @@  do_sin (double x, double dx, double *corp)
 
 /* A more precise variant of DO_SIN.  EPS is the adjustment to the correction
    COR.  */
-static double
+static inline double
+__always_inline
 do_sin_slow (double x, double dx, double eps, double *corp)
 {
   mynumber u;
@@ -615,8 +619,8 @@  __cos (double x)
 /* precision  and if still doesn't accurate enough by mpsin   or dubsin */
 /************************************************************************/
 
-static double
-SECTION
+static inline double
+__always_inline
 slow (double x)
 {
   double res, cor, w[2];
@@ -636,8 +640,8 @@  slow (double x)
 /* and if result still doesn't accurate enough by mpsin   or dubsin            */
 /*******************************************************************************/
 
-static double
-SECTION
+static inline double
+__always_inline
 slow1 (double x)
 {
   double w[2], cor, res;
@@ -657,8 +661,8 @@  slow1 (double x)
 /*  Routine compute sin(x) for   0.855469  <|x|<2.426265  by  __sincostab.tbl  */
 /* and if result still doesn't accurate enough by mpsin   or dubsin       */
 /**************************************************************************/
-static double
-SECTION
+static inline double
+__always_inline
 slow2 (double x)
 {
   double w[2], y, y1, y2, cor, res;
@@ -686,8 +690,8 @@  slow2 (double x)
 /* result.And if result not accurate enough routine calls mpsin1 or dubsin */
 /***************************************************************************/
 
-static double
-SECTION
+static inline double
+__always_inline
 sloww (double x, double dx, double orig, int k)
 {
   double y, t, res, cor, w[2], a, da, xn;
@@ -747,8 +751,8 @@  sloww (double x, double dx, double orig, int k)
 /* accurate enough routine calls  mpsin1   or dubsin                       */
 /***************************************************************************/
 
-static double
-SECTION
+static inline double
+__always_inline
 sloww1 (double x, double dx, double orig, int k)
 {
   double w[2], cor, res;
@@ -777,8 +781,8 @@  sloww1 (double x, double dx, double orig, int k)
 /* accurate enough routine calls  mpsin1   or dubsin                       */
 /***************************************************************************/
 
-static double
-SECTION
+static inline double
+__always_inline
 sloww2 (double x, double dx, double orig, int n)
 {
   double w[2], cor, res;
@@ -808,8 +812,8 @@  sloww2 (double x, double dx, double orig, int n)
 /* result.And if result not accurate enough routine calls other routines    */
 /***************************************************************************/
 
-static double
-SECTION
+static inline double
+__always_inline
 bsloww (double x, double dx, double orig, int n)
 {
   double res, cor, w[2], a, da;
@@ -837,8 +841,8 @@  bsloww (double x, double dx, double orig, int n)
 /* And if result not  accurate enough routine calls  other routines         */
 /***************************************************************************/
 
-static double
-SECTION
+static inline double
+__always_inline
 bsloww1 (double x, double dx, double orig, int n)
 {
   double w[2], cor, res;
@@ -865,8 +869,8 @@  bsloww1 (double x, double dx, double orig, int n)
 /* And if result not accurate enough routine calls  other routines          */
 /***************************************************************************/
 
-static double
-SECTION
+static inline double
+__always_inline
 bsloww2 (double x, double dx, double orig, int n)
 {
   double w[2], cor, res;
@@ -891,8 +895,8 @@  bsloww2 (double x, double dx, double orig, int n)
 /* precision  and if still doesn't accurate enough by mpcos   or docos  */
 /************************************************************************/
 
-static double
-SECTION
+static inline double
+__always_inline
 cslow2 (double x)
 {
   double w[2], cor, res;