diff mbox

Optimize const1 * copysign (const2, y) in reassoc (PR tree-optimization/67815)

Message ID 20151013162849.GF13672@redhat.com
State New
Headers show

Commit Message

Marek Polacek Oct. 13, 2015, 4:28 p.m. UTC
This patch implements the copysign optimization for reassoc I promised
I'd look into.  I.e.,

CST1 * copysign (CST2, y) -> copysign (CST1 * CST2, y) if CST1 > 0
CST1 * copysign (CST2, y) -> -copysign (CST1 * CST2, y) if CST1 < 0

After getting familiar with reassoc a bit this wasn't that hard.  But
I'm hopeless when it comes to floating-point stuff, so I'd appreciate
if you could glance over the tests.  The reassoc-40.c should address
Joseph's comment in the audit trail (with -fno-rounding-math the
optimization would take place).

For 0.0 * copysign (cst, x), the result is folded into 0.0 way before
reassoc, so we probably don't have to pay attention to this case.

Bootstrapped/regtested on x86_64-linux, ok for trunk?

2015-10-13  Marek Polacek  <polacek@redhat.com>

	PR tree-optimization/67815
	* tree-ssa-reassoc.c (attempt_builtin_copysign): New function.
	(reassociate_bb): Call it.

	* gcc.dg/tree-ssa/reassoc-39.c: New test.
	* gcc.dg/tree-ssa/reassoc-40.c: New test.


	Marek

Comments

Richard Biener Oct. 14, 2015, 9:10 a.m. UTC | #1
On Tue, 13 Oct 2015, Marek Polacek wrote:

> This patch implements the copysign optimization for reassoc I promised
> I'd look into.  I.e.,
> 
> CST1 * copysign (CST2, y) -> copysign (CST1 * CST2, y) if CST1 > 0
> CST1 * copysign (CST2, y) -> -copysign (CST1 * CST2, y) if CST1 < 0
> 
> After getting familiar with reassoc a bit this wasn't that hard.  But
> I'm hopeless when it comes to floating-point stuff, so I'd appreciate
> if you could glance over the tests.  The reassoc-40.c should address
> Joseph's comment in the audit trail (with -fno-rounding-math the
> optimization would take place).
> 
> For 0.0 * copysign (cst, x), the result is folded into 0.0 way before
> reassoc, so we probably don't have to pay attention to this case.
> 
> Bootstrapped/regtested on x86_64-linux, ok for trunk?
> 
> 2015-10-13  Marek Polacek  <polacek@redhat.com>
> 
> 	PR tree-optimization/67815
> 	* tree-ssa-reassoc.c (attempt_builtin_copysign): New function.
> 	(reassociate_bb): Call it.
> 
> 	* gcc.dg/tree-ssa/reassoc-39.c: New test.
> 	* gcc.dg/tree-ssa/reassoc-40.c: New test.
> 
> diff --git gcc/testsuite/gcc.dg/tree-ssa/reassoc-39.c gcc/testsuite/gcc.dg/tree-ssa/reassoc-39.c
> index e69de29..589d06b 100644
> --- gcc/testsuite/gcc.dg/tree-ssa/reassoc-39.c
> +++ gcc/testsuite/gcc.dg/tree-ssa/reassoc-39.c
> @@ -0,0 +1,41 @@
> +/* PR tree-optimization/67815 */
> +/* { dg-do compile } */
> +/* { dg-options "-Ofast -fdump-tree-reassoc1-details" } */
> +
> +float
> +f0 (float x)
> +{
> +  return 7.5 * __builtin_copysignf (2.0, x);
> +}
> +
> +float
> +f1 (float x)
> +{
> +  return -7.5 * __builtin_copysignf (2.0, x);
> +}
> +
> +double
> +f2 (double x, double y)
> +{
> +  return x * ((1.0/12) * __builtin_copysign (1.0, y));
> +}
> +
> +double
> +f3 (double x, double y)
> +{
> +  return (x * (-1.0/12)) * __builtin_copysign (1.0, y);
> +}
> +
> +double
> +f4 (double x, double y, double z)
> +{
> +  return (x * z) * ((1.0/12) * __builtin_copysign (4.0, y));
> +}
> +
> +double
> +f5 (double x, double y, double z)
> +{
> +  return (x * (-1.0/12)) * z * __builtin_copysign (2.0, y);
> +}
> +
> +/* { dg-final { scan-tree-dump-times "Optimizing copysign" 6 "reassoc1"} }*/
> diff --git gcc/testsuite/gcc.dg/tree-ssa/reassoc-40.c gcc/testsuite/gcc.dg/tree-ssa/reassoc-40.c
> index e69de29..d65bcc1b 100644
> --- gcc/testsuite/gcc.dg/tree-ssa/reassoc-40.c
> +++ gcc/testsuite/gcc.dg/tree-ssa/reassoc-40.c
> @@ -0,0 +1,21 @@
> +/* PR tree-optimization/67815 */
> +/* { dg-do compile } */
> +/* { dg-options "-Ofast -frounding-math -fdump-tree-reassoc1-details" } */
> +
> +/* Test that the copysign reassoc optimization doesn't fire for
> +   -frounding-math (i.e. HONOR_SIGN_DEPENDENT_ROUNDING) if the multiplication
> +   is inexact.  */
> +
> +double
> +f1 (double y)
> +{
> +  return (1.2 * __builtin_copysign (1.1, y));
> +}
> +
> +double
> +f2 (double y)
> +{
> +  return (-1.2 * __builtin_copysign (1.1, y));
> +}
> +
> +/* { dg-final { scan-tree-dump-not "Optimizing copysign" "reassoc1" } } */
> diff --git gcc/tree-ssa-reassoc.c gcc/tree-ssa-reassoc.c
> index 879722e..b8897b7 100644
> --- gcc/tree-ssa-reassoc.c
> +++ gcc/tree-ssa-reassoc.c
> @@ -4622,6 +4622,95 @@ attempt_builtin_powi (gimple *stmt, vec<operand_entry *> *ops)
>    return result;
>  }
>  
> +/* Attempt to optimize
> +   CST1 * copysign (CST2, y) -> copysign (CST1 * CST2, y) if CST1 > 0, or
> +   CST1 * copysign (CST2, y) -> -copysign (CST1 * CST2, y) if CST1 < 0.  */
> +
> +static void
> +attempt_builtin_copysign (vec<operand_entry *> *ops)
> +{
> +  operand_entry *oe;
> +  unsigned int i;
> +  unsigned int length = ops->length ();
> +  tree cst1 = ops->last ()->op;
> +
> +  if (length == 1 || TREE_CODE (cst1) != REAL_CST)
> +    return;
> +
> +  FOR_EACH_VEC_ELT (*ops, i, oe)
> +    {
> +      if (TREE_CODE (oe->op) == SSA_NAME)

I think you need to check whether the SSA_NAME has a single use only
as you are changing its value.  Which also means you shouldn't be
"reusing" it (because existing debug stmts will then be wrong).
Thus you have to replace it.

> +	{
> +	  gimple *def_stmt = SSA_NAME_DEF_STMT (oe->op);
> +	  if (is_gimple_call (def_stmt))
> +	    {
> +	      tree fndecl = gimple_call_fndecl (def_stmt);
> +	      tree cst2;
> +	      switch (DECL_FUNCTION_CODE (fndecl))
> +		{
> +		CASE_FLT_FN (BUILT_IN_COPYSIGN):
> +		  cst2 = gimple_call_arg (def_stmt, 0);
> +		  /* The first argument of copysign must be a constant,
> +		     otherwise there's nothing to do.  */
> +		  if (TREE_CODE (cst2) == REAL_CST)
> +		    {
> +		      tree mul = const_binop (MULT_EXPR, TREE_TYPE (cst1),
> +					      cst1, cst2);
> +		      /* If we couldn't fold to a single constant, skip it.  */
> +		      if (mul == NULL_TREE)
> +			break;
> +		      /* We're going to replace the copysign argument with
> +			 the multiplication product.  Remove the constant.  */
> +		      ops->pop ();
> +		      gimple_call_set_arg (def_stmt, 0, mul);
> +		      bool cst1_neg = real_isneg (TREE_REAL_CST_PTR (cst1));
> +		      /* Handle the CST1 < 0 case -- negate the result.  */
> +		      if (cst1_neg)
> +			{
> +			  tree lhs = gimple_call_lhs (def_stmt);
> +			  tree negrhs = make_ssa_name (TREE_TYPE (lhs));
> +			  gimple *negate_stmt
> +			    = gimple_build_assign (negrhs, NEGATE_EXPR, lhs);
> +			  insert_stmt_after (negate_stmt, def_stmt);
> +			  ops->ordered_remove (i);
> +			  add_to_ops_vec (ops, negrhs);

Why use ordered remove and add_to_ops_vec here?  Just replace the entry?

Thanks,
Richard.

> +			}
> +		      if (dump_file && (dump_flags & TDF_DETAILS))
> +			{
> +			  fprintf (dump_file, "Optimizing copysign: ");
> +			  print_generic_expr (dump_file, cst1, 0);
> +			  fprintf (dump_file, " * ");
> +			  print_generic_expr (dump_file,
> +					      gimple_call_fn (def_stmt), 0);
> +			  fprintf (dump_file, " (");
> +			  print_generic_expr (dump_file, cst2, 0);
> +			  fprintf (dump_file, ", ");
> +			  print_generic_expr (dump_file,
> +					      gimple_call_arg (def_stmt, 1),
> +					      0);
> +			  fprintf (dump_file, ") into %s",
> +				   cst1_neg ? "-" : "");
> +			  print_generic_expr (dump_file,
> +					      gimple_call_fn (def_stmt), 0);
> +			  fprintf (dump_file, " (");
> +			  print_generic_expr (dump_file, mul, 0);
> +			  fprintf (dump_file, ", ");
> +			  print_generic_expr (dump_file,
> +					      gimple_call_arg (def_stmt, 1),
> +					      0);
> +			  fprintf (dump_file, "\n");
> +			}
> +		      return;
> +		    }
> +		  break;
> +		default:
> +		  break;
> +		}
> +	    }
> +	}
> +    }
> +}
> +
>  /* Transform STMT at *GSI into a copy by replacing its rhs with NEW_RHS.  */
>  
>  static void
> @@ -4764,6 +4853,9 @@ reassociate_bb (basic_block bb)
>  	      if (rhs_code == BIT_IOR_EXPR || rhs_code == BIT_AND_EXPR)
>  		optimize_range_tests (rhs_code, &ops);
>  
> +	      if (rhs_code == MULT_EXPR)
> +		attempt_builtin_copysign (&ops);
> +
>  	      if (first_pass_instance
>  		  && rhs_code == MULT_EXPR
>  		  && flag_unsafe_math_optimizations)
> 
> 	Marek
> 
>
diff mbox

Patch

diff --git gcc/testsuite/gcc.dg/tree-ssa/reassoc-39.c gcc/testsuite/gcc.dg/tree-ssa/reassoc-39.c
index e69de29..589d06b 100644
--- gcc/testsuite/gcc.dg/tree-ssa/reassoc-39.c
+++ gcc/testsuite/gcc.dg/tree-ssa/reassoc-39.c
@@ -0,0 +1,41 @@ 
+/* PR tree-optimization/67815 */
+/* { dg-do compile } */
+/* { dg-options "-Ofast -fdump-tree-reassoc1-details" } */
+
+float
+f0 (float x)
+{
+  return 7.5 * __builtin_copysignf (2.0, x);
+}
+
+float
+f1 (float x)
+{
+  return -7.5 * __builtin_copysignf (2.0, x);
+}
+
+double
+f2 (double x, double y)
+{
+  return x * ((1.0/12) * __builtin_copysign (1.0, y));
+}
+
+double
+f3 (double x, double y)
+{
+  return (x * (-1.0/12)) * __builtin_copysign (1.0, y);
+}
+
+double
+f4 (double x, double y, double z)
+{
+  return (x * z) * ((1.0/12) * __builtin_copysign (4.0, y));
+}
+
+double
+f5 (double x, double y, double z)
+{
+  return (x * (-1.0/12)) * z * __builtin_copysign (2.0, y);
+}
+
+/* { dg-final { scan-tree-dump-times "Optimizing copysign" 6 "reassoc1"} }*/
diff --git gcc/testsuite/gcc.dg/tree-ssa/reassoc-40.c gcc/testsuite/gcc.dg/tree-ssa/reassoc-40.c
index e69de29..d65bcc1b 100644
--- gcc/testsuite/gcc.dg/tree-ssa/reassoc-40.c
+++ gcc/testsuite/gcc.dg/tree-ssa/reassoc-40.c
@@ -0,0 +1,21 @@ 
+/* PR tree-optimization/67815 */
+/* { dg-do compile } */
+/* { dg-options "-Ofast -frounding-math -fdump-tree-reassoc1-details" } */
+
+/* Test that the copysign reassoc optimization doesn't fire for
+   -frounding-math (i.e. HONOR_SIGN_DEPENDENT_ROUNDING) if the multiplication
+   is inexact.  */
+
+double
+f1 (double y)
+{
+  return (1.2 * __builtin_copysign (1.1, y));
+}
+
+double
+f2 (double y)
+{
+  return (-1.2 * __builtin_copysign (1.1, y));
+}
+
+/* { dg-final { scan-tree-dump-not "Optimizing copysign" "reassoc1" } } */
diff --git gcc/tree-ssa-reassoc.c gcc/tree-ssa-reassoc.c
index 879722e..b8897b7 100644
--- gcc/tree-ssa-reassoc.c
+++ gcc/tree-ssa-reassoc.c
@@ -4622,6 +4622,95 @@  attempt_builtin_powi (gimple *stmt, vec<operand_entry *> *ops)
   return result;
 }
 
+/* Attempt to optimize
+   CST1 * copysign (CST2, y) -> copysign (CST1 * CST2, y) if CST1 > 0, or
+   CST1 * copysign (CST2, y) -> -copysign (CST1 * CST2, y) if CST1 < 0.  */
+
+static void
+attempt_builtin_copysign (vec<operand_entry *> *ops)
+{
+  operand_entry *oe;
+  unsigned int i;
+  unsigned int length = ops->length ();
+  tree cst1 = ops->last ()->op;
+
+  if (length == 1 || TREE_CODE (cst1) != REAL_CST)
+    return;
+
+  FOR_EACH_VEC_ELT (*ops, i, oe)
+    {
+      if (TREE_CODE (oe->op) == SSA_NAME)
+	{
+	  gimple *def_stmt = SSA_NAME_DEF_STMT (oe->op);
+	  if (is_gimple_call (def_stmt))
+	    {
+	      tree fndecl = gimple_call_fndecl (def_stmt);
+	      tree cst2;
+	      switch (DECL_FUNCTION_CODE (fndecl))
+		{
+		CASE_FLT_FN (BUILT_IN_COPYSIGN):
+		  cst2 = gimple_call_arg (def_stmt, 0);
+		  /* The first argument of copysign must be a constant,
+		     otherwise there's nothing to do.  */
+		  if (TREE_CODE (cst2) == REAL_CST)
+		    {
+		      tree mul = const_binop (MULT_EXPR, TREE_TYPE (cst1),
+					      cst1, cst2);
+		      /* If we couldn't fold to a single constant, skip it.  */
+		      if (mul == NULL_TREE)
+			break;
+		      /* We're going to replace the copysign argument with
+			 the multiplication product.  Remove the constant.  */
+		      ops->pop ();
+		      gimple_call_set_arg (def_stmt, 0, mul);
+		      bool cst1_neg = real_isneg (TREE_REAL_CST_PTR (cst1));
+		      /* Handle the CST1 < 0 case -- negate the result.  */
+		      if (cst1_neg)
+			{
+			  tree lhs = gimple_call_lhs (def_stmt);
+			  tree negrhs = make_ssa_name (TREE_TYPE (lhs));
+			  gimple *negate_stmt
+			    = gimple_build_assign (negrhs, NEGATE_EXPR, lhs);
+			  insert_stmt_after (negate_stmt, def_stmt);
+			  ops->ordered_remove (i);
+			  add_to_ops_vec (ops, negrhs);
+			}
+		      if (dump_file && (dump_flags & TDF_DETAILS))
+			{
+			  fprintf (dump_file, "Optimizing copysign: ");
+			  print_generic_expr (dump_file, cst1, 0);
+			  fprintf (dump_file, " * ");
+			  print_generic_expr (dump_file,
+					      gimple_call_fn (def_stmt), 0);
+			  fprintf (dump_file, " (");
+			  print_generic_expr (dump_file, cst2, 0);
+			  fprintf (dump_file, ", ");
+			  print_generic_expr (dump_file,
+					      gimple_call_arg (def_stmt, 1),
+					      0);
+			  fprintf (dump_file, ") into %s",
+				   cst1_neg ? "-" : "");
+			  print_generic_expr (dump_file,
+					      gimple_call_fn (def_stmt), 0);
+			  fprintf (dump_file, " (");
+			  print_generic_expr (dump_file, mul, 0);
+			  fprintf (dump_file, ", ");
+			  print_generic_expr (dump_file,
+					      gimple_call_arg (def_stmt, 1),
+					      0);
+			  fprintf (dump_file, "\n");
+			}
+		      return;
+		    }
+		  break;
+		default:
+		  break;
+		}
+	    }
+	}
+    }
+}
+
 /* Transform STMT at *GSI into a copy by replacing its rhs with NEW_RHS.  */
 
 static void
@@ -4764,6 +4853,9 @@  reassociate_bb (basic_block bb)
 	      if (rhs_code == BIT_IOR_EXPR || rhs_code == BIT_AND_EXPR)
 		optimize_range_tests (rhs_code, &ops);
 
+	      if (rhs_code == MULT_EXPR)
+		attempt_builtin_copysign (&ops);
+
 	      if (first_pass_instance
 		  && rhs_code == MULT_EXPR
 		  && flag_unsafe_math_optimizations)