Optimize sin(atan(x)), take 2

Message ID	CAEFO=4CzZzMDfFRa7_G4LUnAD0+NRqe9KhYQ9h7GPjkk8Uqcaw@mail.gmail.com
State	New
Headers	show Return-Path: <gcc-patches-return-485043-incoming=patchwork.ozlabs.org@gcc.gnu.org> DomainKey-Signature: a=rsa-sha1; c=nofws; d=gcc.gnu.org; h=list-id :list-unsubscribe:list-archive:list-post:list-help:sender :mime-version:from:date:message-id:subject:to:content-type; q= dns; s=default; b=mnLAi0WrczGAWgE6TrVWDlxzm5iSH9uaEZVkMjcuHJjkFG Yl/ibmVjyGGZzOcjkIIouRPpZ8EvCwOI3fvqvFvx04NjdelX4hjFqzolJzlgTeTg NIrm7stwL9QFoDT5ev1+deeNVCNXkzZZRmfPBkvbx/8AaRouauKhww92Cz2LE= Mailing-List: contact gcc-patches-help@gcc.gnu.org; run by ezmlm Precedence: bulk Sender: gcc-patches-owner@gcc.gnu.org MIME-Version: 1.0 From: Giuliano Augusto Faulin Belinassi <giuliano.belinassi@usp.br> Date: Mon, 3 Sep 2018 16:11:49 -0300 Message-ID: <CAEFO=4CzZzMDfFRa7_G4LUnAD0+NRqe9KhYQ9h7GPjkk8Uqcaw@mail.gmail.com> Subject: [PATCH] Optimize sin(atan(x)), take 2 To: GCC Patches <gcc-patches@gcc.gnu.org> Content-Type: multipart/mixed; boundary="00000000000035f04c0574fc50bc"
Series	Optimize sin(atan(x)), take 2 \| expand Optimize sin(atan(x)), take 2

Index: gcc/match.pd =================================================================== --- gcc/match.pd (revisão 264058) +++ gcc/match.pd (cópia de trabalho) @@ -4169,6 +4169,39 @@ (tans (atans @0)) @0))) + /* Simplify sin(atan(x)) -> x / sqrt(x*x + 1). */ + (for sins (SIN) + atans (ATAN) + sqrts (SQRT) + (simplify + (sins (atans:s @0)) + (if (SCALAR_FLOAT_TYPE_P (type)) + (switch + (if (types_match (type, float_type_node)) + (cond (le (abs @0) { build_sinatan_cst (float_type_node); }) + (rdiv @0 (sqrts (plus (mult @0 @0) + {build_one_cst (float_type_node);}))) + (BUILT_IN_COPYSIGNF { build_one_cst (float_type_node); } @0))) + (if (types_match (type, double_type_node)) + (cond (le (abs @0) { build_sinatan_cst (double_type_node); }) + (rdiv @0 (sqrts (plus (mult @0 @0) + {build_one_cst (double_type_node);}))) + (BUILT_IN_COPYSIGN { build_one_cst (double_type_node); } @0))) + (if (types_match (type, long_double_type_node)) + (cond (le (abs @0) { build_sinatan_cst (long_double_type_node); }) + (rdiv @0 (sqrts (plus (mult @0 @0) + {build_one_cst (long_double_type_node);}))) + (BUILT_IN_COPYSIGNL { build_one_cst (long_double_type_node); } @0))))))) + +/* Simplify cos(atan(x)) -> 1 / sqrt(x*x + 1). */ + (for coss (COS) + atans (ATAN) + sqrts (SQRT) + (simplify + (coss (atans:s @0)) + (rdiv {build_one_cst (type);} + (sqrts (plus (mult @0 @0) {build_one_cst (type);}))))) + /* cabs(x+0i) or cabs(0+xi) -> abs(x). */ (simplify (CABS (complex:C @0 real_zerop@1)) Index: gcc/real.c =================================================================== --- gcc/real.c (revisão 264058) +++ gcc/real.c (cópia de trabalho) @@ -5279,3 +5279,46 @@ { return HONOR_SIGN_DEPENDENT_ROUNDING (GET_MODE (x)); } + +/* Build real constant used by sin(atan(x)) optimization. + The logic here is as follows: It can be mathematically + shown that sin(atan(x)) = x / sqrt(1 + x*x), but notice + that the second formula has an x*x, which can overflow + if x is big enough. However, x / sqrt(1 + x*x) converges + to 1 for large x. What must be the value of x such that + when computing x / sqrt (1 + x*x) = 1? + + Therefore, we must then solve x / sqrt(1 + x*x) > eps + for x, where eps is the largest number smaller than 1 + representable by the target. Hence, solving for eps + yields that x > eps / sqrt(1 - eps*eps). This eps can + be easily calculated by calling nextafter. Likewise for + the negative x. */ + +void +build_sinatan_real (REAL_VALUE_TYPE * r, tree type) +{ + REAL_VALUE_TYPE eps; + mpfr_t mpfr_eps, mpfr_const1, c, divisor; + const struct real_format * fmt = NULL; + + fmt = type ? REAL_MODE_FORMAT (TYPE_MODE (type)) : NULL; + + mpfr_inits (divisor, mpfr_eps, mpfr_const1, c, NULL); + mpfr_from_real (mpfr_const1, &dconst1, GMP_RNDN); + + real_nextafter (&eps, fmt, &dconst1, &dconst0); + + mpfr_from_real (mpfr_eps, &eps, GMP_RNDZ); + mpfr_mul (divisor, mpfr_eps, mpfr_eps, GMP_RNDU); + mpfr_sub (divisor, mpfr_const1, divisor, GMP_RNDZ); + mpfr_sqrt (divisor, divisor, GMP_RNDZ); + mpfr_div (c, mpfr_eps, divisor, GMP_RNDU); + + real_from_mpfr (r, c, fmt, GMP_RNDU); + + /* For safety reasons. */ + times_pten(r, 1); + + mpfr_clears (divisor, mpfr_eps, mpfr_const1, c, NULL); +} Index: gcc/real.h =================================================================== --- gcc/real.h (revisão 264058) +++ gcc/real.h (cópia de trabalho) @@ -523,4 +523,8 @@ const wide_int_ref &, signop); #endif +/* Build a constant c such that, for every x > c, x / sqrt(1 + x*x) = 1 + in floating point. */ +extern void build_sinatan_real (REAL_VALUE_TYPE *, tree); + #endif /* ! GCC_REAL_H */ Index: gcc/testsuite/gcc.dg/sinatan-1.c =================================================================== --- gcc/testsuite/gcc.dg/sinatan-1.c (nonexistent) +++ gcc/testsuite/gcc.dg/sinatan-1.c (cópia de trabalho) @@ -0,0 +1,136 @@ +/* { dg-do run } */ + +extern float sinf (float); +extern float cosf (float); +extern float atanf (float); +extern float sqrtf (float); +extern float nextafterf (float, float); +extern double sin (double); +extern double cos (double); +extern double atan (double); +extern double sqrt (double); +extern double nextafter (double, double); +extern long double sinl (long double); +extern long double cosl (long double); +extern long double atanl (long double); +extern long double sqrtl (long double); +extern long double nextafterl (long double, long double); + +extern void abort (); + +double __attribute__ ((noinline, optimize("Ofast"))) +sinatan (double x) +{ + return sin (atan (x)); +} + +double __attribute__ ((noinline, optimize("Ofast"))) +cosatan (double x) +{ + return cos (atan (x)); +} + +double __attribute__ ((noinline)) +sinatan_ (double x) +{ + return sin (atan (x)); +} + +float __attribute__ ((noinline, optimize("Ofast"))) +sinatanf(float x) +{ + return sinf (atanf (x)); +} + +float __attribute__ ((noinline, optimize("Ofast"))) +cosatanf(float x) +{ + return cosf (atanf (x)); +} + +float __attribute__ ((noinline)) +sinatanf_ (float x) +{ + return sinf (atanf (x)); +} + +long double __attribute__ ((noinline, optimize("Ofast"))) +sinatanl (long double x) +{ + return sinl (atanl (x)); +} + +long double __attribute__ ((noinline, optimize("Ofast"))) +cosatanl (long double x) +{ + return cosl (atanl (x)); +} + +long double __attribute__ ((noinline)) +sinatanl_ (long double x) +{ + return sinl (atanl (x)); +} + +/* Same logic as implemented in build_sinatan_cst. */ +float +sinatanf_inv (float x) +{ + return (x / sqrtf (1 - x*x))*10; +} + +double +sinatan_inv (double x) +{ + return (x / sqrt (1 - x*x))*10; +} + +long double +sinatanl_inv (long double x) +{ + return (x / sqrtl (1 - x*x))*10; +} + +int +main() +{ + float fc, feps; + double c, eps; + long double lc, leps; + + /* Force move from FPU to memory, otherwise comparison may + fail due to possible more accurate registers (see 387) */ + volatile float fy1, fy2; + volatile double y1, y2; + volatile long double ly1, ly2; + + feps = nextafterf (1.f, 0.f); + eps = nextafter (1. , 0. ); + leps = nextafterl (1.L, 0.L); + + fc = sinatanf_inv (feps); + c = sinatan_inv (eps); + lc = sinatanl_inv (leps); + + fy1 = sinatanf (fc); + fy2 = sinatanf_ (fc); + y1 = sinatan (c); + y2 = sinatan_(c); + ly1 = sinatanl (lc); + ly2 = sinatanl_ (lc); + + if (fy1 != fy2 || y1 != y2 || ly1 != ly2) + abort (); + + fy1 = sinatanf (-fc); + fy2 = sinatanf_ (-fc); + y1 = sinatan (-c); + y2 = sinatan_(-c); + ly1 = sinatanl (-lc); + ly2 = sinatanl_ (-lc); + + if (fy1 != fy2 || y1 != y2 || ly1 != ly2) + abort (); + + return 0; +} Index: gcc/testsuite/gcc.dg/sinatan-2.c =================================================================== --- gcc/testsuite/gcc.dg/sinatan-2.c (nonexistent) +++ gcc/testsuite/gcc.dg/sinatan-2.c (cópia de trabalho) @@ -0,0 +1,59 @@ +/* { dg-do compile } */ +/* { dg-options "-Ofast -fdump-tree-optimized" } */ + +extern float sinf (float); +extern float cosf (float); +extern float atanf (float); +extern double sin (double); +extern double cos (double); +extern double atan (double); +extern long double sinl (long double); +extern long double cosl (long double); +extern long double atanl (long double); + +double __attribute__ ((noinline)) +sinatan_ (double x) +{ + return sin (atan (x)); +} + +double __attribute__ ((noinline)) +cosatan_ (double x) +{ + return cos (atan (x)); +} + +float __attribute__ ((noinline)) +sinatanf_(float x) +{ + return sinf (atanf (x)); +} + +float __attribute__ ((noinline)) +cosatanf_(float x) +{ + return cosf (atanf (x)); +} + +long double __attribute__ ((noinline)) +sinatanl_ (long double x) +{ + return sinl (atanl (x)); +} + +long double __attribute__ ((noinline)) +cosatanl_ (long double x) +{ + return cosl (atanl (x)); +} + +/* There must be no calls to sin, cos, or atan */ +/* {dg-final { scan-tree-dump-not "sin " "optimized" } } */ +/* {dg-final { scan-tree-dump-not "cos " "optimized" } } */ +/* {dg-final { scan-tree-dump-not "atan " "optimized" }} */ +/* {dg-final { scan-tree-dump-not "sinf " "optimized" } } */ +/* {dg-final { scan-tree-dump-not "cosf " "optimized" } } */ +/* {dg-final { scan-tree-dump-not "atanf " "optimized" }} */ +/* {dg-final { scan-tree-dump-not "sinl " "optimized" } } */ +/* {dg-final { scan-tree-dump-not "cosl " "optimized" } } */ +/* {dg-final { scan-tree-dump-not "atanl " "optimized" }} */ Index: gcc/testsuite/gcc.dg/sinatan-3.c =================================================================== --- gcc/testsuite/gcc.dg/sinatan-3.c (nonexistent) +++ gcc/testsuite/gcc.dg/sinatan-3.c (cópia de trabalho) @@ -0,0 +1,65 @@ +/* { dg-do compile } */ +/* { dg-options "-Ofast -fdump-tree-optimized" } */ + +extern float sinf (float); +extern float cosf (float); +extern float atanf (float); +extern double sin (double); +extern double cos (double); +extern double atan (double); +extern long double sinl (long double); +extern long double cosl (long double); +extern long double atanl (long double); + +float __attribute__ ((noinline)) +cosatanf_(float x) +{ + float atg = atanf(x); + return cosf(atg) + atg; +} + +double __attribute__ ((noinline)) +cosatan_(double x) +{ + double atg = atan(x); + return cos(atg) + atg; +} + +long double __attribute__ ((noinline)) +cosatanl_(long double x) +{ + long double atg = atanl(x); + return cosl(atg) + atg; +} + +float __attribute__ ((noinline)) +sinatanf_(float x) +{ + float atg = atanf(x); + return sinf(atg) + atg; +} + +double __attribute__ ((noinline)) +sinatan_(double x) +{ + double atg = atan(x); + return sin(atg) + atg; +} + +long double __attribute__ ((noinline)) +sinatanl_(long double x) +{ + long double atg = atanl(x); + return sinl(atg) + atg; +} + +/* There should be calls to both sin and atan */ +/* { dg-final { scan-tree-dump "cos " "optimized" } } */ +/* { dg-final { scan-tree-dump "sin " "optimized" } } */ +/* { dg-final { scan-tree-dump "atan " "optimized" } } */ +/* { dg-final { scan-tree-dump "cosf " "optimized" } } */ +/* { dg-final { scan-tree-dump "sinf " "optimized" } } */ +/* { dg-final { scan-tree-dump "atanf " "optimized" } } */ +/* { dg-final { scan-tree-dump "cosl " "optimized" } } */ +/* { dg-final { scan-tree-dump "sinl " "optimized" } } */ +/* { dg-final { scan-tree-dump "atanl " "optimized" } } */ Index: gcc/tree.c =================================================================== --- gcc/tree.c (revisão 264058) +++ gcc/tree.c (cópia de trabalho) @@ -2359,7 +2359,16 @@ } } +tree +build_sinatan_cst (tree type) +{ + REAL_VALUE_TYPE cst; + build_sinatan_real (&cst, type); + return build_real (type, cst); +} + + /* Build a BINFO with LEN language slots. */ tree Index: gcc/tree.h =================================================================== --- gcc/tree.h (revisão 264058) +++ gcc/tree.h (cópia de trabalho) @@ -4158,6 +4158,7 @@ extern tree build_minus_one_cst (tree); extern tree build_all_ones_cst (tree); extern tree build_zero_cst (tree); +extern tree build_sinatan_cst (tree); extern tree build_string (int, const char *); extern tree build_poly_int_cst (tree, const poly_wide_int_ref &); extern tree build_tree_list (tree, tree CXX_MEM_STAT_INFO);

Optimize sin(atan(x)), take 2

Commit Message

Comments

Patch