diff mbox

Move cabs simplifications to match.pd

Message ID 87mvvezzqo.fsf@e105548-lin.cambridge.arm.com
State New
Headers show

Commit Message

Richard Sandiford Oct. 19, 2015, 2:42 p.m. UTC
The fold code also expanded cabs(x+yi) to fsqrt(x*x+y*y) when optimising
for speed.  tree-ssa-math-opts.c has this transformation too, but unlike
the fold code, it first checks whether the target implements the sqrt
optab.  The patch simply removes the fold code and keeps the
tree-ssa-math-opts.c logic the same.

gcc.dg/lto/20110201-1_0.c was relying on us replacing cabs
with fsqrt even on targets where fsqrt is itself a library call.
The discussion leading up to that patch suggested that we only
want to test the fold on targets with a square root instruction,
so it would be OK to skip the test on other targets:

    https://gcc.gnu.org/ml/gcc-patches/2011-07/msg01961.html
    https://gcc.gnu.org/ml/gcc-patches/2011-07/msg02036.html

The patch does that using the sqrt_insn effective target.

It's possible that removing the tree folds renders the LTO trick
unnecessary, but since the test was originally for an ICE, it seems
better to leave it as-is.

Tested on x86_64-linux-gnu, aarch64-linux-gnu and arm-linux-gnueabi.
20110201-1_0.c passes on all three.  OK to install?

Thanks,
Richard


gcc/
	* builtins.c (fold_builtin_cabs): Delete.
	(fold_builtin_1): Update accordingly.  Handle constant arguments here.
	* match.pd: Add rules previously handled by fold_builtin_cabs.

gcc/testsuite/
	* gcc.dg/lto/20110201-1_0.c: Restrict to sqrt_insn targets.
	Add associated options for arm*-*-*.
	(sqrt): Remove dummy definition.

Comments

Richard Biener Oct. 19, 2015, 5:15 p.m. UTC | #1
On October 19, 2015 4:42:23 PM GMT+02:00, Richard Sandiford <richard.sandiford@arm.com> wrote:
>The fold code also expanded cabs(x+yi) to fsqrt(x*x+y*y) when
>optimising
>for speed.  tree-ssa-math-opts.c has this transformation too, but
>unlike
>the fold code, it first checks whether the target implements the sqrt
>optab.  The patch simply removes the fold code and keeps the
>tree-ssa-math-opts.c logic the same.
>
>gcc.dg/lto/20110201-1_0.c was relying on us replacing cabs
>with fsqrt even on targets where fsqrt is itself a library call.
>The discussion leading up to that patch suggested that we only
>want to test the fold on targets with a square root instruction,
>so it would be OK to skip the test on other targets:
>
>    https://gcc.gnu.org/ml/gcc-patches/2011-07/msg01961.html
>    https://gcc.gnu.org/ml/gcc-patches/2011-07/msg02036.html
>
>The patch does that using the sqrt_insn effective target.
>
>It's possible that removing the tree folds renders the LTO trick
>unnecessary, but since the test was originally for an ICE, it seems
>better to leave it as-is.
>
>Tested on x86_64-linux-gnu, aarch64-linux-gnu and arm-linux-gnueabi.
>20110201-1_0.c passes on all three.  OK to install?

OK.

Thanks,
Richard.

>Thanks,
>Richard
>
>
>gcc/
>	* builtins.c (fold_builtin_cabs): Delete.
>	(fold_builtin_1): Update accordingly.  Handle constant arguments here.
>	* match.pd: Add rules previously handled by fold_builtin_cabs.
>
>gcc/testsuite/
>	* gcc.dg/lto/20110201-1_0.c: Restrict to sqrt_insn targets.
>	Add associated options for arm*-*-*.
>	(sqrt): Remove dummy definition.
>
>diff --git a/gcc/builtins.c b/gcc/builtins.c
>index 1e4ec35..8f87fd9 100644
>--- a/gcc/builtins.c
>+++ b/gcc/builtins.c
>@@ -7539,82 +7539,6 @@ fold_fixed_mathfn (location_t loc, tree fndecl,
>tree arg)
>   return NULL_TREE;
> }
> 
>-/* Fold call to builtin cabs, cabsf or cabsl with argument ARG.  TYPE
>is the
>-   return type.  Return NULL_TREE if no simplification can be made. 
>*/
>-
>-static tree
>-fold_builtin_cabs (location_t loc, tree arg, tree type, tree fndecl)
>-{
>-  tree res;
>-
>-  if (!validate_arg (arg, COMPLEX_TYPE)
>-      || TREE_CODE (TREE_TYPE (TREE_TYPE (arg))) != REAL_TYPE)
>-    return NULL_TREE;
>-
>-  /* Calculate the result when the argument is a constant.  */
>-  if (TREE_CODE (arg) == COMPLEX_CST
>-      && (res = do_mpfr_arg2 (TREE_REALPART (arg), TREE_IMAGPART
>(arg),
>-			      type, mpfr_hypot)))
>-    return res;
>-
>-  if (TREE_CODE (arg) == COMPLEX_EXPR)
>-    {
>-      tree real = TREE_OPERAND (arg, 0);
>-      tree imag = TREE_OPERAND (arg, 1);
>-
>-      /* If either part is zero, cabs is fabs of the other.  */
>-      if (real_zerop (real))
>-	return fold_build1_loc (loc, ABS_EXPR, type, imag);
>-      if (real_zerop (imag))
>-	return fold_build1_loc (loc, ABS_EXPR, type, real);
>-
>-      /* cabs(x+xi) -> fabs(x)*sqrt(2).  */
>-      if (flag_unsafe_math_optimizations
>-	  && operand_equal_p (real, imag, OEP_PURE_SAME))
>-        {
>-	  STRIP_NOPS (real);
>-	  return fold_build2_loc (loc, MULT_EXPR, type,
>-				  fold_build1_loc (loc, ABS_EXPR, type, real),
>-				  build_real_truncate (type, dconst_sqrt2 ()));
>-	}
>-    }
>-
>-  /* Optimize cabs(-z) and cabs(conj(z)) as cabs(z).  */
>-  if (TREE_CODE (arg) == NEGATE_EXPR
>-      || TREE_CODE (arg) == CONJ_EXPR)
>-    return build_call_expr_loc (loc, fndecl, 1, TREE_OPERAND (arg,
>0));
>-
>-  /* Don't do this when optimizing for size.  */
>-  if (flag_unsafe_math_optimizations
>-      && optimize && optimize_function_for_speed_p (cfun))
>-    {
>-      tree sqrtfn = mathfn_built_in (type, BUILT_IN_SQRT);
>-
>-      if (sqrtfn != NULL_TREE)
>-	{
>-	  tree rpart, ipart, result;
>-
>-	  arg = builtin_save_expr (arg);
>-
>-	  rpart = fold_build1_loc (loc, REALPART_EXPR, type, arg);
>-	  ipart = fold_build1_loc (loc, IMAGPART_EXPR, type, arg);
>-
>-	  rpart = builtin_save_expr (rpart);
>-	  ipart = builtin_save_expr (ipart);
>-
>-	  result = fold_build2_loc (loc, PLUS_EXPR, type,
>-				fold_build2_loc (loc, MULT_EXPR, type,
>-					     rpart, rpart),
>-				fold_build2_loc (loc, MULT_EXPR, type,
>-					     ipart, ipart));
>-
>-	  return build_call_expr_loc (loc, sqrtfn, 1, result);
>-	}
>-    }
>-
>-  return NULL_TREE;
>-}
>-
> /* Build a complex (inf +- 0i) for the result of cproj.  TYPE is the
>    complex tree type of the result.  If NEG is true, the imaginary
>    zero is negative.  */
>@@ -9683,7 +9607,11 @@ fold_builtin_1 (location_t loc, tree fndecl,
>tree arg0)
>     break;
> 
>     CASE_FLT_FN (BUILT_IN_CABS):
>-      return fold_builtin_cabs (loc, arg0, type, fndecl);
>+      if (TREE_CODE (arg0) == COMPLEX_CST
>+	  && TREE_CODE (TREE_TYPE (TREE_TYPE (arg0))) == REAL_TYPE)
>+        return do_mpfr_arg2 (TREE_REALPART (arg0), TREE_IMAGPART
>(arg0),
>+			     type, mpfr_hypot);
>+      break;
> 
>     CASE_FLT_FN (BUILT_IN_CARG):
>       return fold_builtin_carg (loc, arg0, type);
>diff --git a/gcc/match.pd b/gcc/match.pd
>index d677e69..55687c3 100644
>--- a/gcc/match.pd
>+++ b/gcc/match.pd
>@@ -67,6 +67,7 @@ along with GCC; see the file COPYING3.  If not see
> (define_operator_list COPYSIGN BUILT_IN_COPYSIGNF
> 			       BUILT_IN_COPYSIGN
> 			       BUILT_IN_COPYSIGNL)
>+(define_operator_list CABS BUILT_IN_CABSF BUILT_IN_CABS
>BUILT_IN_CABSL)
> 
> /* Simplifications of operations with one constant operand and
>    simplifications to constants or single values.  */
>@@ -392,6 +393,13 @@ along with GCC; see the file COPYING3.  If not see
>   (ccoss (negate @0))
>    (ccoss @0)))
> 
>+/* cabs(-x) and cos(conj(x)) -> cabs(x).  */
>+(for ops (conj negate)
>+ (for cabss (CABS)
>+  (simplify
>+   (cabss (ops @0))
>+   (cabss @0))))
>+
> /* X % Y is smaller than Y.  */
> (for cmp (lt ge)
>  (simplify
>@@ -2336,6 +2344,11 @@ along with GCC; see the file COPYING3.  If not
>see
>    (cbrts (exps @0))
>   (exps (mult @0 { build_real_truncate (type, dconst_third ()); })))))
> 
>+/* cabs(x+0i) or cabs(0+xi) -> abs(x).  */
>+(simplify
>+ (CABS (complex:c @0 real_zerop@1))
>+ (abs @0))
>+
>/* Canonicalization of sequences of math builtins.  These rules
>represent
>    IL simplifications but are not necessarily optimizations.
> 
>@@ -2427,7 +2440,12 @@ along with GCC; see the file COPYING3.  If not
>see
>   /* cbrt(pow(x,y)) -> pow(x,y/3), iff x is nonnegative.  */
>   (simplify
>    (cbrts (pows tree_expr_nonnegative_p@0 @1))
>-   (pows @0 (mult @1 { build_real_truncate (type, dconst_third ());
>})))))
>+   (pows @0 (mult @1 { build_real_truncate (type, dconst_third ());
>}))))
>+
>+ /* cabs(x+xi) -> fabs(x)*sqrt(2).  */
>+ (simplify
>+  (CABS (complex @0 @0))
>+  (mult (abs @0) { build_real_truncate (type, dconst_sqrt2 ()); })))
> 
> /* Narrowing of arithmetic and logical operations. 
> 
>diff --git a/gcc/testsuite/gcc.dg/lto/20110201-1_0.c
>b/gcc/testsuite/gcc.dg/lto/20110201-1_0.c
>index 5073a50..068dddc 100644
>--- a/gcc/testsuite/gcc.dg/lto/20110201-1_0.c
>+++ b/gcc/testsuite/gcc.dg/lto/20110201-1_0.c
>@@ -1,6 +1,8 @@
> /* { dg-lto-do run } */
> /* { dg-lto-options { { -O0 -flto } } } */
>+/* { dg-lto-options { "-O0 -flto -mfloat-abi=softfp -mfpu=neon-vfpv4"
>} { target arm*-*-* } } */
> /* { dg-require-linker-plugin "" } */
>+/* { dg-require-effective-target sqrt_insn } */
> 
> /* We require a linker plugin because otherwise we'd need to link
>    against libm which we are not sure here has cabs on all targets.
>@@ -16,13 +18,4 @@ foo (_Complex double x, int b)
>   return cabs(x);
> }
> 
>-/* We provide a dummy sqrt to avoid link failures on targets that do
>not
>-   expand sqrt inline.  Note that we do not link against libm in order
>-   to ensure cabs is not satisfied by the library, but must be folded.
> */
>-double __attribute__((used))
>-sqrt (double x)
>-{
>-  return x;
>-}
>-
> int main() { return 0; }
diff mbox

Patch

diff --git a/gcc/builtins.c b/gcc/builtins.c
index 1e4ec35..8f87fd9 100644
--- a/gcc/builtins.c
+++ b/gcc/builtins.c
@@ -7539,82 +7539,6 @@  fold_fixed_mathfn (location_t loc, tree fndecl, tree arg)
   return NULL_TREE;
 }
 
-/* Fold call to builtin cabs, cabsf or cabsl with argument ARG.  TYPE is the
-   return type.  Return NULL_TREE if no simplification can be made.  */
-
-static tree
-fold_builtin_cabs (location_t loc, tree arg, tree type, tree fndecl)
-{
-  tree res;
-
-  if (!validate_arg (arg, COMPLEX_TYPE)
-      || TREE_CODE (TREE_TYPE (TREE_TYPE (arg))) != REAL_TYPE)
-    return NULL_TREE;
-
-  /* Calculate the result when the argument is a constant.  */
-  if (TREE_CODE (arg) == COMPLEX_CST
-      && (res = do_mpfr_arg2 (TREE_REALPART (arg), TREE_IMAGPART (arg),
-			      type, mpfr_hypot)))
-    return res;
-
-  if (TREE_CODE (arg) == COMPLEX_EXPR)
-    {
-      tree real = TREE_OPERAND (arg, 0);
-      tree imag = TREE_OPERAND (arg, 1);
-
-      /* If either part is zero, cabs is fabs of the other.  */
-      if (real_zerop (real))
-	return fold_build1_loc (loc, ABS_EXPR, type, imag);
-      if (real_zerop (imag))
-	return fold_build1_loc (loc, ABS_EXPR, type, real);
-
-      /* cabs(x+xi) -> fabs(x)*sqrt(2).  */
-      if (flag_unsafe_math_optimizations
-	  && operand_equal_p (real, imag, OEP_PURE_SAME))
-        {
-	  STRIP_NOPS (real);
-	  return fold_build2_loc (loc, MULT_EXPR, type,
-				  fold_build1_loc (loc, ABS_EXPR, type, real),
-				  build_real_truncate (type, dconst_sqrt2 ()));
-	}
-    }
-
-  /* Optimize cabs(-z) and cabs(conj(z)) as cabs(z).  */
-  if (TREE_CODE (arg) == NEGATE_EXPR
-      || TREE_CODE (arg) == CONJ_EXPR)
-    return build_call_expr_loc (loc, fndecl, 1, TREE_OPERAND (arg, 0));
-
-  /* Don't do this when optimizing for size.  */
-  if (flag_unsafe_math_optimizations
-      && optimize && optimize_function_for_speed_p (cfun))
-    {
-      tree sqrtfn = mathfn_built_in (type, BUILT_IN_SQRT);
-
-      if (sqrtfn != NULL_TREE)
-	{
-	  tree rpart, ipart, result;
-
-	  arg = builtin_save_expr (arg);
-
-	  rpart = fold_build1_loc (loc, REALPART_EXPR, type, arg);
-	  ipart = fold_build1_loc (loc, IMAGPART_EXPR, type, arg);
-
-	  rpart = builtin_save_expr (rpart);
-	  ipart = builtin_save_expr (ipart);
-
-	  result = fold_build2_loc (loc, PLUS_EXPR, type,
-				fold_build2_loc (loc, MULT_EXPR, type,
-					     rpart, rpart),
-				fold_build2_loc (loc, MULT_EXPR, type,
-					     ipart, ipart));
-
-	  return build_call_expr_loc (loc, sqrtfn, 1, result);
-	}
-    }
-
-  return NULL_TREE;
-}
-
 /* Build a complex (inf +- 0i) for the result of cproj.  TYPE is the
    complex tree type of the result.  If NEG is true, the imaginary
    zero is negative.  */
@@ -9683,7 +9607,11 @@  fold_builtin_1 (location_t loc, tree fndecl, tree arg0)
     break;
 
     CASE_FLT_FN (BUILT_IN_CABS):
-      return fold_builtin_cabs (loc, arg0, type, fndecl);
+      if (TREE_CODE (arg0) == COMPLEX_CST
+	  && TREE_CODE (TREE_TYPE (TREE_TYPE (arg0))) == REAL_TYPE)
+        return do_mpfr_arg2 (TREE_REALPART (arg0), TREE_IMAGPART (arg0),
+			     type, mpfr_hypot);
+      break;
 
     CASE_FLT_FN (BUILT_IN_CARG):
       return fold_builtin_carg (loc, arg0, type);
diff --git a/gcc/match.pd b/gcc/match.pd
index d677e69..55687c3 100644
--- a/gcc/match.pd
+++ b/gcc/match.pd
@@ -67,6 +67,7 @@  along with GCC; see the file COPYING3.  If not see
 (define_operator_list COPYSIGN BUILT_IN_COPYSIGNF
 			       BUILT_IN_COPYSIGN
 			       BUILT_IN_COPYSIGNL)
+(define_operator_list CABS BUILT_IN_CABSF BUILT_IN_CABS BUILT_IN_CABSL)
 
 /* Simplifications of operations with one constant operand and
    simplifications to constants or single values.  */
@@ -392,6 +393,13 @@  along with GCC; see the file COPYING3.  If not see
   (ccoss (negate @0))
    (ccoss @0)))
 
+/* cabs(-x) and cos(conj(x)) -> cabs(x).  */
+(for ops (conj negate)
+ (for cabss (CABS)
+  (simplify
+   (cabss (ops @0))
+   (cabss @0))))
+
 /* X % Y is smaller than Y.  */
 (for cmp (lt ge)
  (simplify
@@ -2336,6 +2344,11 @@  along with GCC; see the file COPYING3.  If not see
    (cbrts (exps @0))
    (exps (mult @0 { build_real_truncate (type, dconst_third ()); })))))
 
+/* cabs(x+0i) or cabs(0+xi) -> abs(x).  */
+(simplify
+ (CABS (complex:c @0 real_zerop@1))
+ (abs @0))
+
 /* Canonicalization of sequences of math builtins.  These rules represent
    IL simplifications but are not necessarily optimizations.
 
@@ -2427,7 +2440,12 @@  along with GCC; see the file COPYING3.  If not see
   /* cbrt(pow(x,y)) -> pow(x,y/3), iff x is nonnegative.  */
   (simplify
    (cbrts (pows tree_expr_nonnegative_p@0 @1))
-   (pows @0 (mult @1 { build_real_truncate (type, dconst_third ()); })))))
+   (pows @0 (mult @1 { build_real_truncate (type, dconst_third ()); }))))
+
+ /* cabs(x+xi) -> fabs(x)*sqrt(2).  */
+ (simplify
+  (CABS (complex @0 @0))
+  (mult (abs @0) { build_real_truncate (type, dconst_sqrt2 ()); })))
 
 /* Narrowing of arithmetic and logical operations. 
 
diff --git a/gcc/testsuite/gcc.dg/lto/20110201-1_0.c b/gcc/testsuite/gcc.dg/lto/20110201-1_0.c
index 5073a50..068dddc 100644
--- a/gcc/testsuite/gcc.dg/lto/20110201-1_0.c
+++ b/gcc/testsuite/gcc.dg/lto/20110201-1_0.c
@@ -1,6 +1,8 @@ 
 /* { dg-lto-do run } */
 /* { dg-lto-options { { -O0 -flto } } } */
+/* { dg-lto-options { "-O0 -flto -mfloat-abi=softfp -mfpu=neon-vfpv4" } { target arm*-*-* } } */
 /* { dg-require-linker-plugin "" } */
+/* { dg-require-effective-target sqrt_insn } */
 
 /* We require a linker plugin because otherwise we'd need to link
    against libm which we are not sure here has cabs on all targets.
@@ -16,13 +18,4 @@  foo (_Complex double x, int b)
   return cabs(x);
 }
 
-/* We provide a dummy sqrt to avoid link failures on targets that do not
-   expand sqrt inline.  Note that we do not link against libm in order
-   to ensure cabs is not satisfied by the library, but must be folded.  */
-double __attribute__((used))
-sqrt (double x)
-{
-  return x;
-}
-
 int main() { return 0; }