diff mbox

Fix up pow folding (PR tree-optimization/56125)

Message ID 20130128142504.GH4385@tucnak.redhat.com
State New
Headers show

Commit Message

Jakub Jelinek Jan. 28, 2013, 2:25 p.m. UTC
Hi!

gimple_expand_builtin_pow last two optimizations rely on earlier
optimizations in the same function to be performed, e.g.
folding pow (x, c) for n = 2c into sqrt(x) * powi(x, n / 2) is only
correct for c which isn't an integer (otherwise the sqrt(x) factor would
need to be skipped), but they actually do not check this.
E.g. the pow (x, n) where n is integer is optimized only if:
      && ((n >= -1 && n <= 2)
          || (flag_unsafe_math_optimizations
              && optimize_insn_for_speed_p ()
              && powi_cost (n) <= POWI_MAX_MULTS)))
and as in the testcase the function is called, it isn't optimized and
we fall through till the above mentioned optimization which blindly assumes
that c isn't an integer.

Fixed by both checking that c isn't an integer (and for the last
optimization also that 2c isn't an integer), and also not doing the
-> sqrt(x) * powi(x, n / 2) resp. 1.0 / sqrt(x) * powi(x, abs(n) / 2)
optimization for -Os or cold functions, at least
__attribute__((cold)) double
foo (double x, double n)
{
  return __builtin_pow (x, -1.5);
}
is smaller when expanded as pow call both on x86_64 and on powerpc (with
-Os -ffast-math).  Even just the c*_is_int tests alone could be enough
to fix the bug, so if you say want to enable it for -Os even with c 1.5,
but not for negative values which add another operation, it can be adjusted.

Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?

2013-01-28  Jakub Jelinek  <jakub@redhat.com>

	PR tree-optimization/56125
	* tree-ssa-math-opts.c (gimple_expand_builtin_pow): Don't optimize
	pow(x,c) into sqrt(x) * powi(x, n/2) or
	1.0 / (sqrt(x) * powi(x, abs(n/2))) if c is an integer or when
	optimizing for size.
	Don't optimize pow(x,c) into powi(x, n/3) * powi(cbrt(x), n%3) or
	1.0 / (powi(x, abs(n)/3) * powi(cbrt(x), abs(n)%3)) if 2c is an
	integer.

	* gcc.dg/pr56125.c: New test.

 
	Jakub

Comments

Richard Biener Jan. 28, 2013, 2:36 p.m. UTC | #1
On Mon, 28 Jan 2013, Jakub Jelinek wrote:

> Hi!
> 
> gimple_expand_builtin_pow last two optimizations rely on earlier
> optimizations in the same function to be performed, e.g.
> folding pow (x, c) for n = 2c into sqrt(x) * powi(x, n / 2) is only
> correct for c which isn't an integer (otherwise the sqrt(x) factor would
> need to be skipped), but they actually do not check this.
> E.g. the pow (x, n) where n is integer is optimized only if:
>       && ((n >= -1 && n <= 2)
>           || (flag_unsafe_math_optimizations
>               && optimize_insn_for_speed_p ()
>               && powi_cost (n) <= POWI_MAX_MULTS)))
> and as in the testcase the function is called, it isn't optimized and
> we fall through till the above mentioned optimization which blindly assumes
> that c isn't an integer.
> 
> Fixed by both checking that c isn't an integer (and for the last
> optimization also that 2c isn't an integer), and also not doing the
> -> sqrt(x) * powi(x, n / 2) resp. 1.0 / sqrt(x) * powi(x, abs(n) / 2)
> optimization for -Os or cold functions, at least
> __attribute__((cold)) double
> foo (double x, double n)
> {
>   return __builtin_pow (x, -1.5);
> }
> is smaller when expanded as pow call both on x86_64 and on powerpc (with
> -Os -ffast-math).  Even just the c*_is_int tests alone could be enough
> to fix the bug, so if you say want to enable it for -Os even with c 1.5,
> but not for negative values which add another operation, it can be adjusted.
> 
> Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?

Ok.

Thanks,
Richard.

> 2013-01-28  Jakub Jelinek  <jakub@redhat.com>
> 
> 	PR tree-optimization/56125
> 	* tree-ssa-math-opts.c (gimple_expand_builtin_pow): Don't optimize
> 	pow(x,c) into sqrt(x) * powi(x, n/2) or
> 	1.0 / (sqrt(x) * powi(x, abs(n/2))) if c is an integer or when
> 	optimizing for size.
> 	Don't optimize pow(x,c) into powi(x, n/3) * powi(cbrt(x), n%3) or
> 	1.0 / (powi(x, abs(n)/3) * powi(cbrt(x), abs(n)%3)) if 2c is an
> 	integer.
> 
> 	* gcc.dg/pr56125.c: New test.
> 
> --- gcc/tree-ssa-math-opts.c.jj	2013-01-11 09:02:48.000000000 +0100
> +++ gcc/tree-ssa-math-opts.c	2013-01-28 10:56:40.105950483 +0100
> @@ -1110,7 +1110,7 @@ gimple_expand_builtin_pow (gimple_stmt_i
>    HOST_WIDE_INT n;
>    tree type, sqrtfn, cbrtfn, sqrt_arg0, sqrt_sqrt, result, cbrt_x, powi_cbrt_x;
>    enum machine_mode mode;
> -  bool hw_sqrt_exists;
> +  bool hw_sqrt_exists, c_is_int, c2_is_int;
>  
>    /* If the exponent isn't a constant, there's nothing of interest
>       to be done.  */
> @@ -1122,8 +1122,9 @@ gimple_expand_builtin_pow (gimple_stmt_i
>    c = TREE_REAL_CST (arg1);
>    n = real_to_integer (&c);
>    real_from_integer (&cint, VOIDmode, n, n < 0 ? -1 : 0, 0);
> +  c_is_int = real_identical (&c, &cint);
>  
> -  if (real_identical (&c, &cint)
> +  if (c_is_int
>        && ((n >= -1 && n <= 2)
>  	  || (flag_unsafe_math_optimizations
>  	      && optimize_insn_for_speed_p ()
> @@ -1221,7 +1222,8 @@ gimple_expand_builtin_pow (gimple_stmt_i
>        return build_and_insert_call (gsi, loc, cbrtfn, sqrt_arg0);
>      }
>  
> -  /* Optimize pow(x,c), where n = 2c for some nonzero integer n, into
> +  /* Optimize pow(x,c), where n = 2c for some nonzero integer n
> +     and c not an integer, into
>  
>         sqrt(x) * powi(x, n/2),                n > 0;
>         1.0 / (sqrt(x) * powi(x, abs(n/2))),   n < 0.
> @@ -1230,10 +1232,13 @@ gimple_expand_builtin_pow (gimple_stmt_i
>    real_arithmetic (&c2, MULT_EXPR, &c, &dconst2);
>    n = real_to_integer (&c2);
>    real_from_integer (&cint, VOIDmode, n, n < 0 ? -1 : 0, 0);
> +  c2_is_int = real_identical (&c2, &cint);
>  
>    if (flag_unsafe_math_optimizations
>        && sqrtfn
> -      && real_identical (&c2, &cint))
> +      && c2_is_int
> +      && !c_is_int
> +      && optimize_function_for_speed_p (cfun))
>      {
>        tree powi_x_ndiv2 = NULL_TREE;
>  
> @@ -1286,6 +1291,7 @@ gimple_expand_builtin_pow (gimple_stmt_i
>        && cbrtfn
>        && (gimple_val_nonnegative_real_p (arg0) || !HONOR_NANS (mode))
>        && real_identical (&c2, &c)
> +      && !c2_is_int
>        && optimize_function_for_speed_p (cfun)
>        && powi_cost (n / 3) <= POWI_MAX_MULTS)
>      {
> --- gcc/testsuite/gcc.dg/pr56125.c.jj	2013-01-28 11:00:04.359814742 +0100
> +++ gcc/testsuite/gcc.dg/pr56125.c	2013-01-28 11:00:55.048532118 +0100
> @@ -0,0 +1,21 @@
> +/* PR tree-optimization/56125 */
> +/* { dg-do run } */
> +/* { dg-options "-O2 -ffast-math" } */
> +
> +extern void abort (void);
> +extern double fabs (double);
> +
> +__attribute__((cold)) double
> +foo (double x, double n)
> +{
> +  double u = x / (n * n);
> +  return u;
> +}
> +
> +int
> +main ()
> +{
> +  if (fabs (foo (29, 2) - 7.25) > 0.001)
> +    abort ();
> +  return 0;
> +}
>  
> 	Jakub
> 
>
Bill Schmidt Jan. 28, 2013, 2:49 p.m. UTC | #2
LGTM!  Thanks for fixing this.

Bill

On Mon, 2013-01-28 at 15:25 +0100, Jakub Jelinek wrote:
> Hi!
> 
> gimple_expand_builtin_pow last two optimizations rely on earlier
> optimizations in the same function to be performed, e.g.
> folding pow (x, c) for n = 2c into sqrt(x) * powi(x, n / 2) is only
> correct for c which isn't an integer (otherwise the sqrt(x) factor would
> need to be skipped), but they actually do not check this.
> E.g. the pow (x, n) where n is integer is optimized only if:
>       && ((n >= -1 && n <= 2)
>           || (flag_unsafe_math_optimizations
>               && optimize_insn_for_speed_p ()
>               && powi_cost (n) <= POWI_MAX_MULTS)))
> and as in the testcase the function is called, it isn't optimized and
> we fall through till the above mentioned optimization which blindly assumes
> that c isn't an integer.
> 
> Fixed by both checking that c isn't an integer (and for the last
> optimization also that 2c isn't an integer), and also not doing the
> -> sqrt(x) * powi(x, n / 2) resp. 1.0 / sqrt(x) * powi(x, abs(n) / 2)
> optimization for -Os or cold functions, at least
> __attribute__((cold)) double
> foo (double x, double n)
> {
>   return __builtin_pow (x, -1.5);
> }
> is smaller when expanded as pow call both on x86_64 and on powerpc (with
> -Os -ffast-math).  Even just the c*_is_int tests alone could be enough
> to fix the bug, so if you say want to enable it for -Os even with c 1.5,
> but not for negative values which add another operation, it can be adjusted.
> 
> Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?
> 
> 2013-01-28  Jakub Jelinek  <jakub@redhat.com>
> 
> 	PR tree-optimization/56125
> 	* tree-ssa-math-opts.c (gimple_expand_builtin_pow): Don't optimize
> 	pow(x,c) into sqrt(x) * powi(x, n/2) or
> 	1.0 / (sqrt(x) * powi(x, abs(n/2))) if c is an integer or when
> 	optimizing for size.
> 	Don't optimize pow(x,c) into powi(x, n/3) * powi(cbrt(x), n%3) or
> 	1.0 / (powi(x, abs(n)/3) * powi(cbrt(x), abs(n)%3)) if 2c is an
> 	integer.
> 
> 	* gcc.dg/pr56125.c: New test.
> 
> --- gcc/tree-ssa-math-opts.c.jj	2013-01-11 09:02:48.000000000 +0100
> +++ gcc/tree-ssa-math-opts.c	2013-01-28 10:56:40.105950483 +0100
> @@ -1110,7 +1110,7 @@ gimple_expand_builtin_pow (gimple_stmt_i
>    HOST_WIDE_INT n;
>    tree type, sqrtfn, cbrtfn, sqrt_arg0, sqrt_sqrt, result, cbrt_x, powi_cbrt_x;
>    enum machine_mode mode;
> -  bool hw_sqrt_exists;
> +  bool hw_sqrt_exists, c_is_int, c2_is_int;
> 
>    /* If the exponent isn't a constant, there's nothing of interest
>       to be done.  */
> @@ -1122,8 +1122,9 @@ gimple_expand_builtin_pow (gimple_stmt_i
>    c = TREE_REAL_CST (arg1);
>    n = real_to_integer (&c);
>    real_from_integer (&cint, VOIDmode, n, n < 0 ? -1 : 0, 0);
> +  c_is_int = real_identical (&c, &cint);
> 
> -  if (real_identical (&c, &cint)
> +  if (c_is_int
>        && ((n >= -1 && n <= 2)
>  	  || (flag_unsafe_math_optimizations
>  	      && optimize_insn_for_speed_p ()
> @@ -1221,7 +1222,8 @@ gimple_expand_builtin_pow (gimple_stmt_i
>        return build_and_insert_call (gsi, loc, cbrtfn, sqrt_arg0);
>      }
> 
> -  /* Optimize pow(x,c), where n = 2c for some nonzero integer n, into
> +  /* Optimize pow(x,c), where n = 2c for some nonzero integer n
> +     and c not an integer, into
> 
>         sqrt(x) * powi(x, n/2),                n > 0;
>         1.0 / (sqrt(x) * powi(x, abs(n/2))),   n < 0.
> @@ -1230,10 +1232,13 @@ gimple_expand_builtin_pow (gimple_stmt_i
>    real_arithmetic (&c2, MULT_EXPR, &c, &dconst2);
>    n = real_to_integer (&c2);
>    real_from_integer (&cint, VOIDmode, n, n < 0 ? -1 : 0, 0);
> +  c2_is_int = real_identical (&c2, &cint);
> 
>    if (flag_unsafe_math_optimizations
>        && sqrtfn
> -      && real_identical (&c2, &cint))
> +      && c2_is_int
> +      && !c_is_int
> +      && optimize_function_for_speed_p (cfun))
>      {
>        tree powi_x_ndiv2 = NULL_TREE;
> 
> @@ -1286,6 +1291,7 @@ gimple_expand_builtin_pow (gimple_stmt_i
>        && cbrtfn
>        && (gimple_val_nonnegative_real_p (arg0) || !HONOR_NANS (mode))
>        && real_identical (&c2, &c)
> +      && !c2_is_int
>        && optimize_function_for_speed_p (cfun)
>        && powi_cost (n / 3) <= POWI_MAX_MULTS)
>      {
> --- gcc/testsuite/gcc.dg/pr56125.c.jj	2013-01-28 11:00:04.359814742 +0100
> +++ gcc/testsuite/gcc.dg/pr56125.c	2013-01-28 11:00:55.048532118 +0100
> @@ -0,0 +1,21 @@
> +/* PR tree-optimization/56125 */
> +/* { dg-do run } */
> +/* { dg-options "-O2 -ffast-math" } */
> +
> +extern void abort (void);
> +extern double fabs (double);
> +
> +__attribute__((cold)) double
> +foo (double x, double n)
> +{
> +  double u = x / (n * n);
> +  return u;
> +}
> +
> +int
> +main ()
> +{
> +  if (fabs (foo (29, 2) - 7.25) > 0.001)
> +    abort ();
> +  return 0;
> +}
> 
> 	Jakub
>
Marc Glisse Jan. 28, 2013, 3:41 p.m. UTC | #3
On Mon, 28 Jan 2013, Jakub Jelinek wrote:

> 2013-01-28  Jakub Jelinek  <jakub@redhat.com>
>
> 	PR tree-optimization/56125
> 	* tree-ssa-math-opts.c (gimple_expand_builtin_pow): Don't optimize
> 	pow(x,c) into sqrt(x) * powi(x, n/2) or
> 	1.0 / (sqrt(x) * powi(x, abs(n/2))) if c is an integer or when
> 	optimizing for size.
> 	Don't optimize pow(x,c) into powi(x, n/3) * powi(cbrt(x), n%3) or
> 	1.0 / (powi(x, abs(n)/3) * powi(cbrt(x), abs(n)%3)) if 2c is an
> 	integer.
>
> 	* gcc.dg/pr56125.c: New test.

Hello,

is there an implicit -lm in the testsuite?

The testcase now generates a library call to pow, like gcc-4.6. This is 
correct, but I am surprised this is considered better than leaving the 
original x/(n*n) unchanged... Should that be a different PR?
Jakub Jelinek Jan. 28, 2013, 3:56 p.m. UTC | #4
On Mon, Jan 28, 2013 at 04:41:31PM +0100, Marc Glisse wrote:
> On Mon, 28 Jan 2013, Jakub Jelinek wrote:
> 
> >2013-01-28  Jakub Jelinek  <jakub@redhat.com>
> >
> >	PR tree-optimization/56125
> >	* tree-ssa-math-opts.c (gimple_expand_builtin_pow): Don't optimize
> >	pow(x,c) into sqrt(x) * powi(x, n/2) or
> >	1.0 / (sqrt(x) * powi(x, abs(n/2))) if c is an integer or when
> >	optimizing for size.
> >	Don't optimize pow(x,c) into powi(x, n/3) * powi(cbrt(x), n%3) or
> >	1.0 / (powi(x, abs(n)/3) * powi(cbrt(x), abs(n)%3)) if 2c is an
> >	integer.
> >
> >	* gcc.dg/pr56125.c: New test.
> 
> is there an implicit -lm in the testsuite?

Yes.

> The testcase now generates a library call to pow, like gcc-4.6. This
> is correct, but I am surprised this is considered better than
> leaving the original x/(n*n) unchanged... Should that be a different
> PR?

The function in question is marked as cold, therefore it should be optimized
for size.  The call to pow is certainly shorter than the sqrt,
multiplication, division etc.

	Jakub
Marc Glisse Jan. 28, 2013, 4:07 p.m. UTC | #5
On Mon, 28 Jan 2013, Jakub Jelinek wrote:

> On Mon, Jan 28, 2013 at 04:41:31PM +0100, Marc Glisse wrote:
>> On Mon, 28 Jan 2013, Jakub Jelinek wrote:
>>
>>> 2013-01-28  Jakub Jelinek  <jakub@redhat.com>
>>>
>>> 	PR tree-optimization/56125
>>> 	* tree-ssa-math-opts.c (gimple_expand_builtin_pow): Don't optimize
>>> 	pow(x,c) into sqrt(x) * powi(x, n/2) or
>>> 	1.0 / (sqrt(x) * powi(x, abs(n/2))) if c is an integer or when
>>> 	optimizing for size.
>>> 	Don't optimize pow(x,c) into powi(x, n/3) * powi(cbrt(x), n%3) or
>>> 	1.0 / (powi(x, abs(n)/3) * powi(cbrt(x), abs(n)%3)) if 2c is an
>>> 	integer.
>>>
>>> 	* gcc.dg/pr56125.c: New test.
>
>> The testcase now generates a library call to pow, like gcc-4.6. This
>> is correct, but I am surprised this is considered better than
>> leaving the original x/(n*n) unchanged... Should that be a different
>> PR?
>
> The function in question is marked as cold, therefore it should be optimized
> for size.  The call to pow is certainly shorter than the sqrt,
> multiplication, division etc.

There is no sqrt, x/(n*n) is just one mul and one div, whereas with the 
call I see one mul, 3 movs to prepare for the call, and the call.
Jakub Jelinek Jan. 28, 2013, 4:21 p.m. UTC | #6
On Mon, Jan 28, 2013 at 05:07:10PM +0100, Marc Glisse wrote:
> There is no sqrt, x/(n*n) is just one mul and one div, whereas with
> the call I see one mul, 3 movs to prepare for the call, and the
> call.

Ah, you're talking about the checked in testcase, rather than the one I've
mentioned in the description whether the speed guard is desirable there or
not.  In the checked in testcase, the problem with code size is far earlier
than that, already during folding that
double u = x / (n * n);
is replaced by:
double u = x * __builtin_pow (n, -2.0e+0);
And this isn't something you can then size optimize in the pow folder on its
own, return pow (n, -2.0e); will be supposedly shorter than
return 1.0 / (n * n), the folding doesn't see that this is used in
multiplication which could be perhaps changed into division instead.

	Jakub
diff mbox

Patch

--- gcc/tree-ssa-math-opts.c.jj	2013-01-11 09:02:48.000000000 +0100
+++ gcc/tree-ssa-math-opts.c	2013-01-28 10:56:40.105950483 +0100
@@ -1110,7 +1110,7 @@  gimple_expand_builtin_pow (gimple_stmt_i
   HOST_WIDE_INT n;
   tree type, sqrtfn, cbrtfn, sqrt_arg0, sqrt_sqrt, result, cbrt_x, powi_cbrt_x;
   enum machine_mode mode;
-  bool hw_sqrt_exists;
+  bool hw_sqrt_exists, c_is_int, c2_is_int;
 
   /* If the exponent isn't a constant, there's nothing of interest
      to be done.  */
@@ -1122,8 +1122,9 @@  gimple_expand_builtin_pow (gimple_stmt_i
   c = TREE_REAL_CST (arg1);
   n = real_to_integer (&c);
   real_from_integer (&cint, VOIDmode, n, n < 0 ? -1 : 0, 0);
+  c_is_int = real_identical (&c, &cint);
 
-  if (real_identical (&c, &cint)
+  if (c_is_int
       && ((n >= -1 && n <= 2)
 	  || (flag_unsafe_math_optimizations
 	      && optimize_insn_for_speed_p ()
@@ -1221,7 +1222,8 @@  gimple_expand_builtin_pow (gimple_stmt_i
       return build_and_insert_call (gsi, loc, cbrtfn, sqrt_arg0);
     }
 
-  /* Optimize pow(x,c), where n = 2c for some nonzero integer n, into
+  /* Optimize pow(x,c), where n = 2c for some nonzero integer n
+     and c not an integer, into
 
        sqrt(x) * powi(x, n/2),                n > 0;
        1.0 / (sqrt(x) * powi(x, abs(n/2))),   n < 0.
@@ -1230,10 +1232,13 @@  gimple_expand_builtin_pow (gimple_stmt_i
   real_arithmetic (&c2, MULT_EXPR, &c, &dconst2);
   n = real_to_integer (&c2);
   real_from_integer (&cint, VOIDmode, n, n < 0 ? -1 : 0, 0);
+  c2_is_int = real_identical (&c2, &cint);
 
   if (flag_unsafe_math_optimizations
       && sqrtfn
-      && real_identical (&c2, &cint))
+      && c2_is_int
+      && !c_is_int
+      && optimize_function_for_speed_p (cfun))
     {
       tree powi_x_ndiv2 = NULL_TREE;
 
@@ -1286,6 +1291,7 @@  gimple_expand_builtin_pow (gimple_stmt_i
       && cbrtfn
       && (gimple_val_nonnegative_real_p (arg0) || !HONOR_NANS (mode))
       && real_identical (&c2, &c)
+      && !c2_is_int
       && optimize_function_for_speed_p (cfun)
       && powi_cost (n / 3) <= POWI_MAX_MULTS)
     {
--- gcc/testsuite/gcc.dg/pr56125.c.jj	2013-01-28 11:00:04.359814742 +0100
+++ gcc/testsuite/gcc.dg/pr56125.c	2013-01-28 11:00:55.048532118 +0100
@@ -0,0 +1,21 @@ 
+/* PR tree-optimization/56125 */
+/* { dg-do run } */
+/* { dg-options "-O2 -ffast-math" } */
+
+extern void abort (void);
+extern double fabs (double);
+
+__attribute__((cold)) double
+foo (double x, double n)
+{
+  double u = x / (n * n);
+  return u;
+}
+
+int
+main ()
+{
+  if (fabs (foo (29, 2) - 7.25) > 0.001)
+    abort ();
+  return 0;
+}