Patchwork Handle fractional exponents in gimple_expand_builtin_pow (PR46728 patch 5)

login
register
mail settings
Submitter William J. Schmidt
Date May 29, 2011, 5:06 p.m.
Message ID <1306688807.6103.13.camel@gnopaine>
Download mbox | patch
Permalink /patch/97845/
State New
Headers show

Comments

William J. Schmidt - May 29, 2011, 5:06 p.m.
This patch adds support for fractional exponents C where 2C or 3C is
equivalent to an integer.  There is one FIXME in place for the issue
previously noted with respect to tree_expr_nonnegative_p, which I plan
to look at later this week.

Regtested on powerpc64-linux and examined code generation for
correctness and performance.  OK for trunk?

Thanks,
Bill



2011-05-29  Bill Schmidt  <wschmidt@linux.vnet.ibm.com>

	* tree-ssa-math-opts.c (build_and_insert_binop): New.
	(gimple_expand_pow_frac_exp): New.
	(gimple_expand_builtin_pow): Use build_and_insert_binop and
	gimple_expand_pow_frac_exp.
Richard Guenther - May 30, 2011, 12:09 p.m.
On Sun, May 29, 2011 at 7:06 PM, William J. Schmidt
<wschmidt@linux.vnet.ibm.com> wrote:
> This patch adds support for fractional exponents C where 2C or 3C is
> equivalent to an integer.  There is one FIXME in place for the issue
> previously noted with respect to tree_expr_nonnegative_p, which I plan
> to look at later this week.
>
> Regtested on powerpc64-linux and examined code generation for
> correctness and performance.  OK for trunk?
>
> Thanks,
> Bill
>
>
>
> 2011-05-29  Bill Schmidt  <wschmidt@linux.vnet.ibm.com>
>
>        * tree-ssa-math-opts.c (build_and_insert_binop): New.
>        (gimple_expand_pow_frac_exp): New.
>        (gimple_expand_builtin_pow): Use build_and_insert_binop and
>        gimple_expand_pow_frac_exp.
>
> Index: gcc/tree-ssa-math-opts.c
> ===================================================================
> --- gcc/tree-ssa-math-opts.c    (revision 174395)
> +++ gcc/tree-ssa-math-opts.c    (working copy)
> @@ -1054,6 +1054,161 @@ build_and_insert_call (gimple_stmt_iterator *gsi,
>   return ssa_target;
>  }
>
> +/* Build a gimple binary operation with the given CODE and arguments
> +   ARG0, ARG1, assigning the result to a new SSA name for variable
> +   TARGET.  Insert the statement prior to GSI's current position, and
> +   return the fresh SSA name.*/
> +
> +static tree
> +build_and_insert_binop (gimple_stmt_iterator *gsi, enum tree_code code,
> +                       tree arg0, tree arg1, tree target, location_t loc)

Canonical argument order would be

 gsi, loc, code, target, arg0, arg1

> +{
> +  tree result = make_ssa_name (target, NULL);
> +  gimple stmt = gimple_build_assign_with_ops (code, result, arg0, arg1);
> +  gimple_set_location (stmt, loc);
> +  gsi_insert_before (gsi, stmt, GSI_SAME_STMT);
> +  return result;
> +}
> +
> +/* Attempt to optimize pow(ARG0,C), where C is a real constant not
> +   equal to any integer.  When 2C or 3C is an integer, we can sometimes
> +   improve the code using sqrt and/or cbrt.  */
> +
> +static tree
> +gimple_expand_pow_frac_exp (gimple_stmt_iterator *gsi, location_t loc,
> +                           tree arg0, REAL_VALUE_TYPE c)
> +{
> +  REAL_VALUE_TYPE c2, cint, dconst3;
> +  HOST_WIDE_INT n;
> +  tree type = TREE_TYPE (arg0);
> +  enum machine_mode mode = TYPE_MODE (type);
> +  tree sqrtfn = mathfn_built_in (TREE_TYPE (arg0), BUILT_IN_SQRT);
> +  tree cbrtfn;
> +
> +  /* Optimize pow(x,c), where n = 2c for some nonzero integer n, into
> +
> +       sqrt(x) * powi(x, n/2),                n > 0;
> +       1.0 / (sqrt(x) * powi(x, abs(n/2))),   n < 0.
> +
> +     Do not calculate the powi factor when n/2 = 0.  */
> +  real_arithmetic (&c2, MULT_EXPR, &c, &dconst2);
> +  n = real_to_integer (&c2);
> +  real_from_integer (&cint, VOIDmode, n, n < 0 ? -1 : 0, 0);
> +
> +  if (flag_unsafe_math_optimizations
> +      && sqrtfn
> +      && real_identical (&c2, &cint))
> +    {
> +      tree powi_x_ndiv2 = NULL_TREE;
> +      tree sqrt_arg0, result;
> +      tree target = NULL_TREE;
> +
> +      /* Attempt to fold powi(arg0, abs(n/2)) into multiplies.  If not
> +         possible or profitable, give up.  Skip the degenerate case when
> +         n is 1 or -1, where the result is always 1.  */
> +      if (abs (n) != 1)
> +       {
> +         powi_x_ndiv2 = gimple_expand_builtin_powi (gsi, loc, arg0, abs(n/2));
> +         if (!powi_x_ndiv2)
> +           return NULL_TREE;
> +       }
> +
> +      /* Calculate sqrt(x).  When n is not 1 or -1, multiply it by the
> +        result of the optimal multiply sequence just calculated.  */
> +      sqrt_arg0 = build_and_insert_call (gsi, sqrtfn, arg0, &target, loc);
> +
> +      if (abs (n) == 1)
> +       result = sqrt_arg0;
> +      else
> +       result = build_and_insert_binop (gsi, MULT_EXPR, sqrt_arg0,
> +                                        powi_x_ndiv2, target, loc);
> +
> +      /* If n is negative, reciprocate the result.  */
> +      if (n < 0)
> +       result = build_and_insert_binop (gsi, RDIV_EXPR,
> +                                        build_real (type, dconst1),
> +                                        result, target, loc);
> +      return result;
> +    }
> +
> +  /* Optimize pow(x,c), where 3c = n for some nonzero integer n, into
> +
> +     powi(x, n/3) * powi(cbrt(x), n%3),                    n > 0;
> +     1.0 / (powi(x, abs(n)/3) * powi(cbrt(x), abs(n)%3)),  n < 0.
> +
> +     Do not calculate the first factor when n/3 = 0.  As cbrt(x) is
> +     different from pow(x, 1./3.) due to rounding and behavior with
> +     negative x, we need to constrain this transformation to unsafe
> +     math and positive x or finite math.  */
> +  cbrtfn = mathfn_built_in (TREE_TYPE (arg0), BUILT_IN_CBRT);
> +
> +  real_from_integer (&dconst3, VOIDmode, 3, 0, 0);
> +  real_arithmetic (&c2, MULT_EXPR, &c, &dconst3);
> +  real_round (&c2, mode, &c2);
> +  n = real_to_integer (&c2);
> +  real_from_integer (&cint, VOIDmode, n, n < 0 ? -1 : 0, 0);
> +  real_arithmetic (&c2, RDIV_EXPR, &cint, &dconst3);
> +  real_convert (&c2, mode, &c2);
> +
> +  if (flag_unsafe_math_optimizations
> +      && cbrtfn
> +      /* FIXME: The following line was originally
> +        && (tree_expr_nonnegative_p (arg0) || !HONOR_NANS (mode)),
> +        but since arg0 is a gimple value, the first predicate
> +        will always return false.  It needs to be replaced with a
> +        call to a similar gimple_val_nonnegative_p function to be
> +         added in gimple-fold.c.  */
> +      && !HONOR_NANS (mode)
> +      && real_identical (&c2, &c)
> +      && optimize_function_for_speed_p (cfun)
> +      && powi_cost (n / 3) <= POWI_MAX_MULTS)
> +    {
> +      tree powi_x_ndiv3 = NULL_TREE;
> +      tree cbrt_x, powi_cbrt_x, result;
> +      tree target = NULL_TREE;
> +
> +      /* Attempt to fold powi(arg0, abs(n/3)) into multiplies.  If not
> +         possible or profitable, give up.  Skip the degenerate case when
> +         abs(n) < 3, where the result is always 1.  */
> +      if (abs (n) >= 3)
> +       {
> +         powi_x_ndiv3 = gimple_expand_builtin_powi (gsi, loc, arg0,
> +                                                    abs (n / 3));
> +         if (!powi_x_ndiv3)
> +           return NULL_TREE;
> +       }
> +
> +      /* Calculate powi(cbrt(x), n%3).  Don't use gimple_expand_builtin_powi
> +         as that creates an unnecessary variable.  Instead, just produce
> +         either cbrt(x) or cbrt(x) * cbrt(x).  */
> +      cbrt_x = build_and_insert_call (gsi, cbrtfn, arg0, &target, loc);
> +
> +      if (abs (n) % 3 == 1)
> +       powi_cbrt_x = cbrt_x;
> +      else
> +       powi_cbrt_x = build_and_insert_binop (gsi, MULT_EXPR, cbrt_x,
> +                                             cbrt_x, target, loc);
> +
> +      /* Multiply the two subexpressions, unless powi(x,abs(n)/3) = 1.  */
> +      if (abs (n) < 3)
> +       result = powi_cbrt_x;
> +      else
> +       result = build_and_insert_binop (gsi, MULT_EXPR, powi_x_ndiv3,
> +                                        powi_cbrt_x, target, loc);
> +
> +      /* If n is negative, reciprocate the result.  */
> +      if (n < 0)
> +       result = build_and_insert_binop (gsi, RDIV_EXPR,
> +                                        build_real (type, dconst1),
> +                                        result, target, loc);
> +
> +      return result;
> +    }
> +
> +  /* No optimizations succeeded.  */
> +  return NULL_TREE;
> +}
> +
>  /* ARG0 and ARG1 are the two arguments to a pow builtin call in GSI
>    with location info LOC.  If possible, create an equivalent and
>    less expensive sequence of statements prior to GSI, and return an
> @@ -1065,11 +1220,10 @@ gimple_expand_builtin_pow (gimple_stmt_iterator *g
>  {
>   REAL_VALUE_TYPE c, cint, dconst1_4, dconst3_4, dconst1_3, dconst1_6;
>   HOST_WIDE_INT n;
> -  tree type, sqrtfn, cbrtfn, sqrt_arg0, sqrt_sqrt, ssa_target;
> +  tree type, sqrtfn, cbrtfn, sqrt_arg0, sqrt_sqrt;
>   tree target = NULL_TREE;
>   enum machine_mode mode;
>   bool hw_sqrt_exists;
> -  gimple mult_stmt;
>
>   /* If the exponent isn't a constant, there's nothing of interest
>      to be done.  */
> @@ -1141,13 +1295,8 @@ gimple_expand_builtin_pow (gimple_stmt_iterator *g
>       sqrt_sqrt = build_and_insert_call (gsi, sqrtfn, sqrt_arg0, &target, loc);
>
>       /* sqrt(x) * sqrt(sqrt(x))  */
> -      ssa_target = make_ssa_name (target, NULL);
> -      mult_stmt = gimple_build_assign_with_ops (MULT_EXPR, ssa_target,
> -                                               sqrt_arg0, sqrt_sqrt);
> -      gimple_set_location (mult_stmt, loc);
> -      gsi_insert_before (gsi, mult_stmt, GSI_SAME_STMT);
> -
> -      return ssa_target;
> +      return build_and_insert_binop (gsi, MULT_EXPR, sqrt_arg0, sqrt_sqrt,
> +                                    target, loc);
>     }
>
>   /* Optimize pow(x,1./3.) = cbrt(x).  This requires unsafe math
> @@ -1197,7 +1346,8 @@ gimple_expand_builtin_pow (gimple_stmt_iterator *g
>       return build_and_insert_call (gsi, cbrtfn, sqrt_arg0, &target, loc);
>     }
>
> -  return NULL_TREE;
> +  /* Attempt to optimize various other fractional exponents.  */
> +  return gimple_expand_pow_frac_exp (gsi, loc, arg0, c);

Please just inline the code here instead of using another function.
It's still all expansions of pow ().

Ok with that changes.

Thanks,
Richard.

>  }
>
>  /* Go through all calls to sin, cos and cexpi and call execute_cse_sincos_1
>
>
>

Patch

Index: gcc/tree-ssa-math-opts.c
===================================================================
--- gcc/tree-ssa-math-opts.c	(revision 174395)
+++ gcc/tree-ssa-math-opts.c	(working copy)
@@ -1054,6 +1054,161 @@  build_and_insert_call (gimple_stmt_iterator *gsi,
   return ssa_target;
 }
 
+/* Build a gimple binary operation with the given CODE and arguments
+   ARG0, ARG1, assigning the result to a new SSA name for variable
+   TARGET.  Insert the statement prior to GSI's current position, and
+   return the fresh SSA name.*/
+
+static tree
+build_and_insert_binop (gimple_stmt_iterator *gsi, enum tree_code code,
+			tree arg0, tree arg1, tree target, location_t loc)
+{
+  tree result = make_ssa_name (target, NULL);
+  gimple stmt = gimple_build_assign_with_ops (code, result, arg0, arg1);
+  gimple_set_location (stmt, loc);
+  gsi_insert_before (gsi, stmt, GSI_SAME_STMT);
+  return result;
+}
+
+/* Attempt to optimize pow(ARG0,C), where C is a real constant not
+   equal to any integer.  When 2C or 3C is an integer, we can sometimes
+   improve the code using sqrt and/or cbrt.  */
+
+static tree
+gimple_expand_pow_frac_exp (gimple_stmt_iterator *gsi, location_t loc,
+			    tree arg0, REAL_VALUE_TYPE c)
+{
+  REAL_VALUE_TYPE c2, cint, dconst3;
+  HOST_WIDE_INT n;
+  tree type = TREE_TYPE (arg0);
+  enum machine_mode mode = TYPE_MODE (type);
+  tree sqrtfn = mathfn_built_in (TREE_TYPE (arg0), BUILT_IN_SQRT);
+  tree cbrtfn;
+
+  /* Optimize pow(x,c), where n = 2c for some nonzero integer n, into
+
+       sqrt(x) * powi(x, n/2),                n > 0;
+       1.0 / (sqrt(x) * powi(x, abs(n/2))),   n < 0.
+
+     Do not calculate the powi factor when n/2 = 0.  */
+  real_arithmetic (&c2, MULT_EXPR, &c, &dconst2);
+  n = real_to_integer (&c2);
+  real_from_integer (&cint, VOIDmode, n, n < 0 ? -1 : 0, 0);
+
+  if (flag_unsafe_math_optimizations
+      && sqrtfn
+      && real_identical (&c2, &cint))
+    {
+      tree powi_x_ndiv2 = NULL_TREE;
+      tree sqrt_arg0, result;
+      tree target = NULL_TREE;
+
+      /* Attempt to fold powi(arg0, abs(n/2)) into multiplies.  If not
+         possible or profitable, give up.  Skip the degenerate case when
+         n is 1 or -1, where the result is always 1.  */
+      if (abs (n) != 1)
+	{
+	  powi_x_ndiv2 = gimple_expand_builtin_powi (gsi, loc, arg0, abs(n/2));
+	  if (!powi_x_ndiv2)
+	    return NULL_TREE;
+	}
+
+      /* Calculate sqrt(x).  When n is not 1 or -1, multiply it by the
+	 result of the optimal multiply sequence just calculated.  */
+      sqrt_arg0 = build_and_insert_call (gsi, sqrtfn, arg0, &target, loc);
+
+      if (abs (n) == 1)
+	result = sqrt_arg0;
+      else
+	result = build_and_insert_binop (gsi, MULT_EXPR, sqrt_arg0,
+					 powi_x_ndiv2, target, loc);
+
+      /* If n is negative, reciprocate the result.  */
+      if (n < 0)
+	result = build_and_insert_binop (gsi, RDIV_EXPR,
+					 build_real (type, dconst1),
+					 result, target, loc);
+      return result;
+    }
+
+  /* Optimize pow(x,c), where 3c = n for some nonzero integer n, into
+
+     powi(x, n/3) * powi(cbrt(x), n%3),                    n > 0;
+     1.0 / (powi(x, abs(n)/3) * powi(cbrt(x), abs(n)%3)),  n < 0.
+
+     Do not calculate the first factor when n/3 = 0.  As cbrt(x) is
+     different from pow(x, 1./3.) due to rounding and behavior with
+     negative x, we need to constrain this transformation to unsafe
+     math and positive x or finite math.  */
+  cbrtfn = mathfn_built_in (TREE_TYPE (arg0), BUILT_IN_CBRT);
+
+  real_from_integer (&dconst3, VOIDmode, 3, 0, 0);
+  real_arithmetic (&c2, MULT_EXPR, &c, &dconst3);
+  real_round (&c2, mode, &c2);
+  n = real_to_integer (&c2);
+  real_from_integer (&cint, VOIDmode, n, n < 0 ? -1 : 0, 0);
+  real_arithmetic (&c2, RDIV_EXPR, &cint, &dconst3);
+  real_convert (&c2, mode, &c2);
+
+  if (flag_unsafe_math_optimizations
+      && cbrtfn
+      /* FIXME: The following line was originally
+	 && (tree_expr_nonnegative_p (arg0) || !HONOR_NANS (mode)),
+	 but since arg0 is a gimple value, the first predicate
+	 will always return false.  It needs to be replaced with a
+	 call to a similar gimple_val_nonnegative_p function to be
+         added in gimple-fold.c.  */
+      && !HONOR_NANS (mode)
+      && real_identical (&c2, &c)
+      && optimize_function_for_speed_p (cfun)
+      && powi_cost (n / 3) <= POWI_MAX_MULTS)
+    {
+      tree powi_x_ndiv3 = NULL_TREE;
+      tree cbrt_x, powi_cbrt_x, result;
+      tree target = NULL_TREE;
+
+      /* Attempt to fold powi(arg0, abs(n/3)) into multiplies.  If not
+         possible or profitable, give up.  Skip the degenerate case when
+         abs(n) < 3, where the result is always 1.  */
+      if (abs (n) >= 3)
+	{
+	  powi_x_ndiv3 = gimple_expand_builtin_powi (gsi, loc, arg0,
+						     abs (n / 3));
+	  if (!powi_x_ndiv3)
+	    return NULL_TREE;
+	}
+
+      /* Calculate powi(cbrt(x), n%3).  Don't use gimple_expand_builtin_powi
+         as that creates an unnecessary variable.  Instead, just produce
+         either cbrt(x) or cbrt(x) * cbrt(x).  */
+      cbrt_x = build_and_insert_call (gsi, cbrtfn, arg0, &target, loc);
+
+      if (abs (n) % 3 == 1)
+	powi_cbrt_x = cbrt_x;
+      else
+	powi_cbrt_x = build_and_insert_binop (gsi, MULT_EXPR, cbrt_x,
+					      cbrt_x, target, loc);
+
+      /* Multiply the two subexpressions, unless powi(x,abs(n)/3) = 1.  */
+      if (abs (n) < 3)
+	result = powi_cbrt_x;
+      else
+	result = build_and_insert_binop (gsi, MULT_EXPR, powi_x_ndiv3,
+					 powi_cbrt_x, target, loc);
+
+      /* If n is negative, reciprocate the result.  */
+      if (n < 0)
+	result = build_and_insert_binop (gsi, RDIV_EXPR, 
+					 build_real (type, dconst1),
+					 result, target, loc);
+
+      return result;
+    }
+
+  /* No optimizations succeeded.  */
+  return NULL_TREE;
+}
+
 /* ARG0 and ARG1 are the two arguments to a pow builtin call in GSI
    with location info LOC.  If possible, create an equivalent and
    less expensive sequence of statements prior to GSI, and return an
@@ -1065,11 +1220,10 @@  gimple_expand_builtin_pow (gimple_stmt_iterator *g
 {
   REAL_VALUE_TYPE c, cint, dconst1_4, dconst3_4, dconst1_3, dconst1_6;
   HOST_WIDE_INT n;
-  tree type, sqrtfn, cbrtfn, sqrt_arg0, sqrt_sqrt, ssa_target;
+  tree type, sqrtfn, cbrtfn, sqrt_arg0, sqrt_sqrt;
   tree target = NULL_TREE;
   enum machine_mode mode;
   bool hw_sqrt_exists;
-  gimple mult_stmt;
 
   /* If the exponent isn't a constant, there's nothing of interest
      to be done.  */
@@ -1141,13 +1295,8 @@  gimple_expand_builtin_pow (gimple_stmt_iterator *g
       sqrt_sqrt = build_and_insert_call (gsi, sqrtfn, sqrt_arg0, &target, loc);
 
       /* sqrt(x) * sqrt(sqrt(x))  */
-      ssa_target = make_ssa_name (target, NULL);
-      mult_stmt = gimple_build_assign_with_ops (MULT_EXPR, ssa_target,
-						sqrt_arg0, sqrt_sqrt);
-      gimple_set_location (mult_stmt, loc);
-      gsi_insert_before (gsi, mult_stmt, GSI_SAME_STMT);
-
-      return ssa_target;
+      return build_and_insert_binop (gsi, MULT_EXPR, sqrt_arg0, sqrt_sqrt,
+				     target, loc);
     }
 
   /* Optimize pow(x,1./3.) = cbrt(x).  This requires unsafe math
@@ -1197,7 +1346,8 @@  gimple_expand_builtin_pow (gimple_stmt_iterator *g
       return build_and_insert_call (gsi, cbrtfn, sqrt_arg0, &target, loc);
     }
 
-  return NULL_TREE;
+  /* Attempt to optimize various other fractional exponents.  */
+  return gimple_expand_pow_frac_exp (gsi, loc, arg0, c);
 }
 
 /* Go through all calls to sin, cos and cexpi and call execute_cse_sincos_1