diff mbox

Fix PR lto/49302

Message ID 1307552192.4798.55.camel@L3G5336.ibm.com
State New
Headers show

Commit Message

Bill Schmidt June 8, 2011, 4:56 p.m. UTC
This patch adds code to the cse_sincos pass to expand builtin cabs (x)
into sqrt (r*r + i*i), where r = realpart(x) and i = imagpart(x).  This
is usually handled by fold_builtin_cabs; however, PR49302 shows a case
where compile flags of -O0 -flto combined with link flags of -O2
-ffast-math allow cabs to leak through.

Test case gcc.dg/lto/20110201-1_0.c indicates that cabs is not
guaranteed to be implemented in libm, so -lm is not specified for the
test.  This patch does not present a perfect solution, in that it will
produce a builtin call to sqrt for targets that don't have a hardware
sqrt instruction.  Although sqrt should always be provided by libm, this
test case will fail since -lm is not specified.  Note that this is not
new behavior introduced by the patch, so this is probably OK; it just
restores what was happening prior to the pow/powi fixes for PR46728.

The test case has been verified fixed on x86_64-linux, and verified to
fail only with an unsatisfied reference to sqrt on powerpc64-linux.
Bootstrapped and regtested on both platforms.

OK for trunk?

Thanks,
Bill


2011-06-08  Bill Schmidt  <wschmidt@linux.vnet.ibm.com>

	PR lto/49302
	* tree-ssa-math-opts.c (build_and_insert_ref): New.
	(gimple_expand_builtin_cabs): New.
	(execute_cse_sincos): Add case for BUILT_IN_CABS.

Comments

Richard Biener June 9, 2011, 8:55 a.m. UTC | #1
On Wed, Jun 8, 2011 at 6:56 PM, William J. Schmidt
<wschmidt@linux.vnet.ibm.com> wrote:
> This patch adds code to the cse_sincos pass to expand builtin cabs (x)
> into sqrt (r*r + i*i), where r = realpart(x) and i = imagpart(x).  This
> is usually handled by fold_builtin_cabs; however, PR49302 shows a case
> where compile flags of -O0 -flto combined with link flags of -O2
> -ffast-math allow cabs to leak through.
>
> Test case gcc.dg/lto/20110201-1_0.c indicates that cabs is not
> guaranteed to be implemented in libm, so -lm is not specified for the
> test.  This patch does not present a perfect solution, in that it will
> produce a builtin call to sqrt for targets that don't have a hardware
> sqrt instruction.  Although sqrt should always be provided by libm, this
> test case will fail since -lm is not specified.  Note that this is not
> new behavior introduced by the patch, so this is probably OK; it just
> restores what was happening prior to the pow/powi fixes for PR46728.
>
> The test case has been verified fixed on x86_64-linux, and verified to
> fail only with an unsatisfied reference to sqrt on powerpc64-linux.
> Bootstrapped and regtested on both platforms.
>
> OK for trunk?
>
> Thanks,
> Bill
>
>
> 2011-06-08  Bill Schmidt  <wschmidt@linux.vnet.ibm.com>
>
>        PR lto/49302
>        * tree-ssa-math-opts.c (build_and_insert_ref): New.
>        (gimple_expand_builtin_cabs): New.
>        (execute_cse_sincos): Add case for BUILT_IN_CABS.
>
>
> Index: gcc/tree-ssa-math-opts.c
> ===================================================================
> --- gcc/tree-ssa-math-opts.c    (revision 174765)
> +++ gcc/tree-ssa-math-opts.c    (working copy)
> @@ -1070,6 +1070,22 @@ build_and_insert_binop (gimple_stmt_iterator *gsi,
>   return result;
>  }
>
> +/* Build a gimple reference operation with the given CODE and argument
> +   ARG, assigning the result to a new SSA name for variable TARGET.
> +   Insert the statement prior to GSI's current position, and return
> +   the fresh SSA name.*/

Two spaces after '.'

> +
> +static inline tree
> +build_and_insert_ref (gimple_stmt_iterator *gsi, location_t loc, tree type,
> +                     tree target, enum tree_code code, tree arg0)
> +{
> +  tree result = make_ssa_name (target, NULL);
> +  gimple stmt = gimple_build_assign (result, build1 (code, type, arg0));
> +  gimple_set_location (stmt, loc);
> +  gsi_insert_before (gsi, stmt, GSI_SAME_STMT);
> +  return result;
> +}
> +
>  /* ARG0 and ARG1 are the two arguments to a pow builtin call in GSI
>    with location info LOC.  If possible, create an equivalent and
>    less expensive sequence of statements prior to GSI, and return an
> @@ -1306,6 +1322,41 @@ gimple_expand_builtin_pow (gimple_stmt_iterator *g
>   return NULL_TREE;
>  }
>
> +/* ARG is the argument to a cabs builtin call in GSI with location info
> +   LOC.  Create a sequence of statements prior to GSI that calculates
> +   sqrt(R*R + I*I), where R and I are the real and imaginary components
> +   of ARG, respectively.  Return an expression holding the result.  */
> +
> +static tree
> +gimple_expand_builtin_cabs (gimple_stmt_iterator *gsi, location_t loc, tree arg)
> +{
> +  tree target, real_part, imag_part, addend1, addend2, sum, result, sqrtfn;
> +  tree type = TREE_TYPE (arg);
> +
> +  gcc_assert (TREE_CODE (type) == COMPLEX_TYPE);

No need to assert this.

> +  type = TREE_TYPE (type);
> +  sqrtfn = mathfn_built_in (type, BUILT_IN_SQRT);
> +
> +  if (!sqrtfn)
> +    return NULL_TREE;

I think we should only do this when a HW sqrt is available.

> +
> +  target = create_tmp_var (type, "cabs");
> +  add_referenced_var (target);
> +
> +  real_part = build_and_insert_ref (gsi, loc, type, target,
> +                                   REALPART_EXPR, arg);
> +  addend1 = build_and_insert_binop (gsi, loc, target, MULT_EXPR,
> +                                   real_part, real_part);
> +  imag_part = build_and_insert_ref (gsi, loc, type, target,
> +                                   IMAGPART_EXPR, arg);
> +  addend2 = build_and_insert_binop (gsi, loc, target, MULT_EXPR,
> +                                   imag_part, imag_part);
> +  sum = build_and_insert_binop (gsi, loc, target, PLUS_EXPR, addend1, addend2);
> +  result = build_and_insert_call (gsi, loc, &target, sqrtfn, sum);
> +
> +  return result;
> +}
> +
>  /* Go through all calls to sin, cos and cexpi and call execute_cse_sincos_1
>    on the SSA_NAME argument of each of them.  Also expand powi(x,n) into
>    an optimal number of multiplies, when n is a constant.  */
> @@ -1388,6 +1439,34 @@ execute_cse_sincos (void)
>                    }
>                  break;
>
> +               CASE_FLT_FN (BUILT_IN_CABS):
> +                 /* This code is usually redundant with fold_builtin_cabs.
> +                    There is a possible edge case where compile flags
> +                    include -O0 -flto, while link flags include -O2
> +                    -ffast-math.  In this case, the fold does not occur
> +                    and cabs can leak through.  Since it is not guaranteed
> +                    to be implemented in libm on all targets, we need to
> +                    lower it here.
> +
> +                    See test case gcc.dg/lto/20110201-1_0.c.  */

I would omit this comment - it is true especially only for the testcase
which is quite artificial.  Generally we want to move all of the
non-trivial (thus, expanding) foldings from builtins.c to the gimple
level (which is "here") and the non-trivial combining foldings eventually
to tree-ssa-forwprop.c.

> +                 if (!flag_unsafe_math_optimizations
> +                     || !optimize_function_for_speed_p (cfun))
> +                   break;

Please move this check to gimple_expand_builtin_cabs instead.

Btw, we can use a more accurate optimize-for-speed check, namely
optimize_bb_for_speed_p (gimple_bb (stmt)) (eventually add a
convenient optimize_stmt_for_speed_p () to predict.c).

Thanks,
Richard.

> +                 arg0 = gimple_call_arg (stmt, 0);
> +                 loc = gimple_location (stmt);
> +                 result = gimple_expand_builtin_cabs (&gsi, loc, arg0);
> +
> +                 if (result)
> +                   {
> +                     tree lhs = gimple_get_lhs (stmt);
> +                     gimple new_stmt = gimple_build_assign (lhs, result);
> +                     gimple_set_location (new_stmt, loc);
> +                     unlink_stmt_vdef (stmt);
> +                     gsi_replace (&gsi, new_stmt, true);
> +                   }
> +                 break;
> +
>                default:;
>                }
>            }
>
>
>
diff mbox

Patch

Index: gcc/tree-ssa-math-opts.c
===================================================================
--- gcc/tree-ssa-math-opts.c	(revision 174765)
+++ gcc/tree-ssa-math-opts.c	(working copy)
@@ -1070,6 +1070,22 @@  build_and_insert_binop (gimple_stmt_iterator *gsi,
   return result;
 }
 
+/* Build a gimple reference operation with the given CODE and argument
+   ARG, assigning the result to a new SSA name for variable TARGET.  
+   Insert the statement prior to GSI's current position, and return
+   the fresh SSA name.*/
+
+static inline tree
+build_and_insert_ref (gimple_stmt_iterator *gsi, location_t loc, tree type,
+		      tree target, enum tree_code code, tree arg0)
+{
+  tree result = make_ssa_name (target, NULL);
+  gimple stmt = gimple_build_assign (result, build1 (code, type, arg0));
+  gimple_set_location (stmt, loc);
+  gsi_insert_before (gsi, stmt, GSI_SAME_STMT);
+  return result;
+}
+
 /* ARG0 and ARG1 are the two arguments to a pow builtin call in GSI
    with location info LOC.  If possible, create an equivalent and
    less expensive sequence of statements prior to GSI, and return an
@@ -1306,6 +1322,41 @@  gimple_expand_builtin_pow (gimple_stmt_iterator *g
   return NULL_TREE;
 }
 
+/* ARG is the argument to a cabs builtin call in GSI with location info
+   LOC.  Create a sequence of statements prior to GSI that calculates
+   sqrt(R*R + I*I), where R and I are the real and imaginary components
+   of ARG, respectively.  Return an expression holding the result.  */
+
+static tree
+gimple_expand_builtin_cabs (gimple_stmt_iterator *gsi, location_t loc, tree arg)
+{
+  tree target, real_part, imag_part, addend1, addend2, sum, result, sqrtfn;
+  tree type = TREE_TYPE (arg);
+  
+  gcc_assert (TREE_CODE (type) == COMPLEX_TYPE);
+  type = TREE_TYPE (type);
+  sqrtfn = mathfn_built_in (type, BUILT_IN_SQRT);
+
+  if (!sqrtfn)
+    return NULL_TREE;
+
+  target = create_tmp_var (type, "cabs");
+  add_referenced_var (target);
+
+  real_part = build_and_insert_ref (gsi, loc, type, target,
+				    REALPART_EXPR, arg);
+  addend1 = build_and_insert_binop (gsi, loc, target, MULT_EXPR,
+				    real_part, real_part);
+  imag_part = build_and_insert_ref (gsi, loc, type, target, 
+				    IMAGPART_EXPR, arg);
+  addend2 = build_and_insert_binop (gsi, loc, target, MULT_EXPR,
+				    imag_part, imag_part);
+  sum = build_and_insert_binop (gsi, loc, target, PLUS_EXPR, addend1, addend2);
+  result = build_and_insert_call (gsi, loc, &target, sqrtfn, sum);
+
+  return result;
+}
+
 /* Go through all calls to sin, cos and cexpi and call execute_cse_sincos_1
    on the SSA_NAME argument of each of them.  Also expand powi(x,n) into
    an optimal number of multiplies, when n is a constant.  */
@@ -1388,6 +1439,34 @@  execute_cse_sincos (void)
 		    }
 		  break;
 
+		CASE_FLT_FN (BUILT_IN_CABS):
+		  /* This code is usually redundant with fold_builtin_cabs.
+		     There is a possible edge case where compile flags
+		     include -O0 -flto, while link flags include -O2
+		     -ffast-math.  In this case, the fold does not occur
+		     and cabs can leak through.  Since it is not guaranteed
+		     to be implemented in libm on all targets, we need to
+		     lower it here.
+
+		     See test case gcc.dg/lto/20110201-1_0.c.  */
+		  if (!flag_unsafe_math_optimizations
+		      || !optimize_function_for_speed_p (cfun))
+		    break;
+
+		  arg0 = gimple_call_arg (stmt, 0);
+		  loc = gimple_location (stmt);
+		  result = gimple_expand_builtin_cabs (&gsi, loc, arg0);
+
+		  if (result)
+		    {
+		      tree lhs = gimple_get_lhs (stmt);
+		      gimple new_stmt = gimple_build_assign (lhs, result);
+		      gimple_set_location (new_stmt, loc);
+		      unlink_stmt_vdef (stmt);
+		      gsi_replace (&gsi, new_stmt, true);
+		    }
+		  break;
+
 		default:;
 		}
 	    }