diff mbox series

middle-end: Don't apply copysign optimization if target does not implement optab [PR112468]

Message ID patch-18123-tamar@arm.com
State New
Headers show
Series middle-end: Don't apply copysign optimization if target does not implement optab [PR112468] | expand

Commit Message

Tamar Christina Jan. 4, 2024, 6:20 p.m. UTC
Hi All,

currently GCC does not treat IFN_COPYSIGN the same as the copysign tree expr.
The latter has a libcall fallback and the IFN can only do optabs.

Because of this the change I made to optimize copysign only works if the
target has impemented the optab, but it should work for those that have the
libcall too.

More annoyingly if a target has vector versions of ABS and NEG but not COPYSIGN
then the change made them lose vectorization.

The proper fix for this is to treat the IFN the same as the tree EXPR and to
enhance expand_COPYSIGN to also support vector calls.

I have such a patch for GCC 15 but it's quite big and too invasive for stage-4.
As such this is a minimal fix, just don't apply the transformation and leave
targets which don't have the optab unoptimized.

Targets list for check_effective_target_ifn_copysign was gotten by grepping for
copysign and looking at the optab.

Bootstrapped Regtested on aarch64-none-linux-gnu and no issues.
Tests ran in x86_64-pc-linux-gnu -m64/-m32 and tests no longer fail.

Ok for master?

Thanks,
Tamar

gcc/ChangeLog:

	PR tree-optimization/112468
	* doc/sourcebuild.texi: Document ifn_copysign.
	* match.pd: Only apply transformation if target supports the IFN.

gcc/testsuite/ChangeLog:

	PR tree-optimization/112468
	* gcc.dg/fold-copysign-1.c: Modify tests based on if target supports
	IFN_COPYSIGN.
	* gcc.dg/pr55152-2.c: Likewise.
	* gcc.dg/tree-ssa/abs-4.c: Likewise.
	* gcc.dg/tree-ssa/backprop-6.c: Likewise.
	* gcc.dg/tree-ssa/copy-sign-2.c: Likewise.
	* gcc.dg/tree-ssa/mult-abs-2.c: Likewise.
	* lib/target-supports.exp (check_effective_target_ifn_copysign): New.

--- inline copy of patch -- 
diff --git a/gcc/doc/sourcebuild.texi b/gcc/doc/sourcebuild.texi
index 4be67daedb20d394857c02739389cabf23c0d533..f4847dafe65cbbf8c9de34905f614ef6957658b4 100644




--
diff --git a/gcc/doc/sourcebuild.texi b/gcc/doc/sourcebuild.texi
index 4be67daedb20d394857c02739389cabf23c0d533..f4847dafe65cbbf8c9de34905f614ef6957658b4 100644
--- a/gcc/doc/sourcebuild.texi
+++ b/gcc/doc/sourcebuild.texi
@@ -2664,6 +2664,10 @@ Target requires a command line argument to enable a SIMD instruction set.
 @item xorsign
 Target supports the xorsign optab expansion.
 
+@item ifn_copysign
+Target supports the IFN_COPYSIGN optab expansion for both scalar and vector
+types.
+
 @end table
 
 @subsubsection Environment attributes
diff --git a/gcc/match.pd b/gcc/match.pd
index d57e29bfe1d68afd4df4dda20fecc2405ff05332..87d13e7e3e1aa6d89119142b614890dc4729b521 100644
--- a/gcc/match.pd
+++ b/gcc/match.pd
@@ -1159,13 +1159,22 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
  (simplify
   (copysigns @0 REAL_CST@1)
   (if (!REAL_VALUE_NEGATIVE (TREE_REAL_CST (@1)))
-   (abs @0))))
+   (abs @0)
+#if GIMPLE
+   (if (!direct_internal_fn_supported_p (IFN_COPYSIGN, type,
+					 OPTIMIZE_FOR_BOTH))
+    (negate (abs @0)))
+#endif
+   )))
 
+#if GIMPLE
 /* Transform fneg (fabs (X)) -> copysign (X, -1).  */
 (simplify
  (negate (abs @0))
- (IFN_COPYSIGN @0 { build_minus_one_cst (type); }))
-
+ (if (direct_internal_fn_supported_p (IFN_COPYSIGN, type,
+				      OPTIMIZE_FOR_BOTH))
+   (IFN_COPYSIGN @0 { build_minus_one_cst (type); })))
+#endif
 /* copysign(copysign(x, y), z) -> copysign(x, z).  */
 (for copysigns (COPYSIGN_ALL)
  (simplify
diff --git a/gcc/testsuite/gcc.dg/fold-copysign-1.c b/gcc/testsuite/gcc.dg/fold-copysign-1.c
index f9cafd14ab05f5e8ab2f6f68e62801d21c2df6a6..96b80c733794fffada1b08274ef39cc8f6e442ce 100644
--- a/gcc/testsuite/gcc.dg/fold-copysign-1.c
+++ b/gcc/testsuite/gcc.dg/fold-copysign-1.c
@@ -1,5 +1,6 @@
 /* { dg-do compile } */
 /* { dg-options "-O -fdump-tree-cddce1" } */
+/* { dg-additional-options "-msse -mfpmath=sse" { target { { i?86-*-* x86_64-*-* } && ilp32 } } } */
 
 double foo (double x)
 {
@@ -12,5 +13,7 @@ double bar (double x)
   return __builtin_copysign (x, minuszero);
 }
 
-/* { dg-final { scan-tree-dump-times "__builtin_copysign" 1 "cddce1" } } */
-/* { dg-final { scan-tree-dump-times "= ABS_EXPR" 1 "cddce1" } } */
+/* { dg-final { scan-tree-dump-times "__builtin_copysign" 1 "cddce1" { target ifn_copysign } } } */
+/* { dg-final { scan-tree-dump-times "= ABS_EXPR" 1 "cddce1" { target ifn_copysign } } } */
+/* { dg-final { scan-tree-dump-times "= -" 1 "cddce1" { target { ! ifn_copysign } } } } */
+/* { dg-final { scan-tree-dump-times "= ABS_EXPR" 2 "cddce1" { target { ! ifn_copysign } } } } */
diff --git a/gcc/testsuite/gcc.dg/pr55152-2.c b/gcc/testsuite/gcc.dg/pr55152-2.c
index 605f202ed6bc7aa8fe921457b02ff0b88cc63ce6..24068cffa4a8e2807ba7d16c4ed3def4f736e797 100644
--- a/gcc/testsuite/gcc.dg/pr55152-2.c
+++ b/gcc/testsuite/gcc.dg/pr55152-2.c
@@ -1,5 +1,6 @@
 /* { dg-do compile } */
 /* { dg-options "-O -ffinite-math-only -fno-signed-zeros -fstrict-overflow -fdump-tree-optimized" } */
+/* { dg-additional-options "-msse -mfpmath=sse" { target { { i?86-*-* x86_64-*-* } && ilp32 } } } */
 
 double g (double a)
 {
@@ -10,5 +11,6 @@ int f(int a)
   return (a<-a)?a:-a;
 }
 
-/* { dg-final { scan-tree-dump-times "\.COPYSIGN" 1 "optimized" } } */
-/* { dg-final { scan-tree-dump-times "ABS_EXPR" 1 "optimized" } } */
+/* { dg-final { scan-tree-dump-times "\.COPYSIGN" 1 "optimized" { target ifn_copysign } } } */
+/* { dg-final { scan-tree-dump-times "ABS_EXPR" 1 "optimized" { target ifn_copysign } } } */
+/* { dg-final { scan-tree-dump-times "ABS_EXPR" 2 "optimized" { target { ! ifn_copysign } } } } */
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/abs-4.c b/gcc/testsuite/gcc.dg/tree-ssa/abs-4.c
index e1b825f37f69ac3c4666b3a52d733368805ad31d..80fa448df1259c7dba406797f4198205783a2fba 100644
--- a/gcc/testsuite/gcc.dg/tree-ssa/abs-4.c
+++ b/gcc/testsuite/gcc.dg/tree-ssa/abs-4.c
@@ -1,5 +1,6 @@
 /* { dg-do compile } */
 /* { dg-options "-O1 -fdump-tree-optimized" } */
+/* { dg-additional-options "-msse -mfpmath=sse" { target { { i?86-*-* x86_64-*-* } && ilp32 } } } */
 /* PR tree-optimization/109829 */
 
 float abs_f(float x) { return __builtin_signbit(x) ? x : -x; }
@@ -9,6 +10,8 @@ long double abs_ld(long double x) { return __builtin_signbit(x) ? x : -x; }
 
 /* __builtin_signbit(x) ? x : -x. Should be convert into - ABS_EXP<x> */
 /* { dg-final { scan-tree-dump-not "signbit" "optimized"} } */
-/* { dg-final { scan-tree-dump-times "= ABS_EXPR" 1 "optimized"} } */
-/* { dg-final { scan-tree-dump-times "= -" 1 "optimized"} } */
-/* { dg-final { scan-tree-dump-times "= \.COPYSIGN" 2 "optimized"} } */
+/* { dg-final { scan-tree-dump-times "= ABS_EXPR" 1 "optimized" { target ifn_copysign } } } */
+/* { dg-final { scan-tree-dump-times "= -" 1 "optimized" { target ifn_copysign } } } */
+/* { dg-final { scan-tree-dump-times "= \.COPYSIGN" 2 "optimized" { target ifn_copysign } } } */
+/* { dg-final { scan-tree-dump-times "= ABS_EXPR" 3 "optimized" { target { ! ifn_copysign } } } } */
+/* { dg-final { scan-tree-dump-times "= -" 3 "optimized" { target { ! ifn_copysign } } } } */
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/backprop-6.c b/gcc/testsuite/gcc.dg/tree-ssa/backprop-6.c
index c3a138642d6ff7be984e91fa1343cb2718db7ae1..4087ba93018bb71710102eb379460bc760020081 100644
--- a/gcc/testsuite/gcc.dg/tree-ssa/backprop-6.c
+++ b/gcc/testsuite/gcc.dg/tree-ssa/backprop-6.c
@@ -1,5 +1,6 @@
 /* { dg-do compile } */
 /* { dg-options "-O -fdump-tree-backprop-details" }  */
+/* { dg-additional-options "-msse -mfpmath=sse" { target { { i?86-*-* x86_64-*-* } && ilp32 } } } */
 
 void start (void *);
 void end (void *);
@@ -26,6 +27,8 @@ TEST_FUNCTION (float, f)
 TEST_FUNCTION (double, )
 TEST_FUNCTION (long double, l)
 
-/* { dg-final { scan-tree-dump-times {Deleting[^\n]* = -} 4 "backprop" } } */
-/* { dg-final { scan-tree-dump-times {Deleting[^\n]* = \.COPYSIGN} 2 "backprop" } } */
-/* { dg-final { scan-tree-dump-times {Deleting[^\n]* = ABS_EXPR <} 1 "backprop" } } */
+/* { dg-final { scan-tree-dump-times {Deleting[^\n]* = -} 4 "backprop" { target ifn_copysign } } } */
+/* { dg-final { scan-tree-dump-times {Deleting[^\n]* = \.COPYSIGN} 2 "backprop" { target ifn_copysign } } } */
+/* { dg-final { scan-tree-dump-times {Deleting[^\n]* = ABS_EXPR <} 1 "backprop" { target ifn_copysign } } } */
+/* { dg-final { scan-tree-dump-times {Deleting[^\n]* = -} 6 "backprop" { target { ! ifn_copysign } } } } */
+/* { dg-final { scan-tree-dump-times {Deleting[^\n]* = ABS_EXPR <} 3 "backprop" { target { ! ifn_copysign } } } } */
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/copy-sign-2.c b/gcc/testsuite/gcc.dg/tree-ssa/copy-sign-2.c
index e5d565c4b9832c00106588ef411fbd8c292a5cad..e43bc315bef2bd11c11cfd2685f5088e792b7bf7 100644
--- a/gcc/testsuite/gcc.dg/tree-ssa/copy-sign-2.c
+++ b/gcc/testsuite/gcc.dg/tree-ssa/copy-sign-2.c
@@ -1,4 +1,5 @@
 /* { dg-options "-O2 -ffast-math -fdump-tree-optimized" } */
+/* { dg-additional-options "-msse -mfpmath=sse" { target { { i?86-*-* x86_64-*-* } && ilp32 } } } */
 /* { dg-do compile } */
 float f(float x)
 {
@@ -10,5 +11,6 @@ float f1(float x)
   float t = __builtin_copysignf (1.0f, -x);
   return x * t;
 }
-/* { dg-final { scan-tree-dump-times "ABS" 1 "optimized"} } */
-/* { dg-final { scan-tree-dump-times ".COPYSIGN" 1 "optimized"} } */
+/* { dg-final { scan-tree-dump-times "ABS" 1 "optimized" { target ifn_copysign } } } */
+/* { dg-final { scan-tree-dump-times ".COPYSIGN" 1 "optimized" { target ifn_copysign } } } */
+/* { dg-final { scan-tree-dump-times "ABS" 2 "optimized" { target { ! ifn_copysign } } } } */
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/mult-abs-2.c b/gcc/testsuite/gcc.dg/tree-ssa/mult-abs-2.c
index a22896b21c8b5a4d5d8e28bd8ae0db896e63ade0..675127cfe56b2e9aa9d4c06e2bdce62b59545a08 100644
--- a/gcc/testsuite/gcc.dg/tree-ssa/mult-abs-2.c
+++ b/gcc/testsuite/gcc.dg/tree-ssa/mult-abs-2.c
@@ -34,5 +34,5 @@ float i1(float x)
 {
   return x * (x <= 0.f ? 1.f : -1.f);
 }
-/* { dg-final { scan-tree-dump-times "ABS" 4 "gimple"} } */
-/* { dg-final { scan-tree-dump-times "\.COPYSIGN" 4 "gimple"} } */
+
+/* { dg-final { scan-tree-dump-times "ABS" 8 "gimple" } } */
diff --git a/gcc/testsuite/lib/target-supports.exp b/gcc/testsuite/lib/target-supports.exp
index 7f13ff0ca565efdf19065811f3301db897329073..64a081cfafd78e5ccb7322d3b382f800b30bbe70 100644
--- a/gcc/testsuite/lib/target-supports.exp
+++ b/gcc/testsuite/lib/target-supports.exp
@@ -7830,6 +7830,29 @@ proc check_effective_target_xorsign { } {
 	     || [istarget aarch64*-*-*] || [istarget arm*-*-*] }}]
 }
 
+# Return 1 if the target plus current options supports folding of
+# copysign into IFN_COPYSIGN.
+#
+# This won't change for different subtargets so cache the result.
+
+proc check_effective_target_ifn_copysign { } {
+    return [check_cached_effective_target_indexed ifn_copysign {
+      expr {
+	 (([istarget i?86-*-*] || [istarget x86_64-*-*])
+	   && [is-effective-target sse])
+	 || ([istarget loongarch*-*-*] && [check_effective_target_loongarch_sx])
+	 || ([istarget powerpc*-*-*]
+	     && ![istarget powerpc-*-linux*paired*])
+	 || [istarget alpha*-*-*]
+	 || [istarget aarch64*-*-*]
+	 || [is-effective-target arm_neon]
+	 || ([istarget s390*-*-*]
+	     && [check_effective_target_s390_vx])
+	 || ([istarget riscv*-*-*]
+	     && [check_effective_target_riscv_v])
+	}}]
+}
+
 # Return 1 if the target plus current options supports a vector
 # widening summation of *short* args into *int* result, 0 otherwise.
 #

Comments

Palmer Dabbelt Jan. 4, 2024, 10:32 p.m. UTC | #1
On Thu, 04 Jan 2024 10:20:25 PST (-0800), tamar.christina@arm.com wrote:
> Hi All,
>
> currently GCC does not treat IFN_COPYSIGN the same as the copysign tree expr.
> The latter has a libcall fallback and the IFN can only do optabs.
>
> Because of this the change I made to optimize copysign only works if the
> target has impemented the optab, but it should work for those that have the
> libcall too.
>
> More annoyingly if a target has vector versions of ABS and NEG but not COPYSIGN
> then the change made them lose vectorization.
>
> The proper fix for this is to treat the IFN the same as the tree EXPR and to
> enhance expand_COPYSIGN to also support vector calls.
>
> I have such a patch for GCC 15 but it's quite big and too invasive for stage-4.
> As such this is a minimal fix, just don't apply the transformation and leave
> targets which don't have the optab unoptimized.
>
> Targets list for check_effective_target_ifn_copysign was gotten by grepping for
> copysign and looking at the optab.
>
> Bootstrapped Regtested on aarch64-none-linux-gnu and no issues.
> Tests ran in x86_64-pc-linux-gnu -m64/-m32 and tests no longer fail.
>
> Ok for master?
>
> Thanks,
> Tamar
>
> gcc/ChangeLog:
>
> 	PR tree-optimization/112468
> 	* doc/sourcebuild.texi: Document ifn_copysign.
> 	* match.pd: Only apply transformation if target supports the IFN.
>
> gcc/testsuite/ChangeLog:
>
> 	PR tree-optimization/112468
> 	* gcc.dg/fold-copysign-1.c: Modify tests based on if target supports
> 	IFN_COPYSIGN.
> 	* gcc.dg/pr55152-2.c: Likewise.
> 	* gcc.dg/tree-ssa/abs-4.c: Likewise.
> 	* gcc.dg/tree-ssa/backprop-6.c: Likewise.
> 	* gcc.dg/tree-ssa/copy-sign-2.c: Likewise.
> 	* gcc.dg/tree-ssa/mult-abs-2.c: Likewise.
> 	* lib/target-supports.exp (check_effective_target_ifn_copysign): New.
>
> --- inline copy of patch --
> diff --git a/gcc/doc/sourcebuild.texi b/gcc/doc/sourcebuild.texi
> index 4be67daedb20d394857c02739389cabf23c0d533..f4847dafe65cbbf8c9de34905f614ef6957658b4 100644
> --- a/gcc/doc/sourcebuild.texi
> +++ b/gcc/doc/sourcebuild.texi
> @@ -2664,6 +2664,10 @@ Target requires a command line argument to enable a SIMD instruction set.
>  @item xorsign
>  Target supports the xorsign optab expansion.
>
> +@item ifn_copysign
> +Target supports the IFN_COPYSIGN optab expansion for both scalar and vector
> +types.
> +
>  @end table
>
>  @subsubsection Environment attributes
> diff --git a/gcc/match.pd b/gcc/match.pd
> index d57e29bfe1d68afd4df4dda20fecc2405ff05332..87d13e7e3e1aa6d89119142b614890dc4729b521 100644
> --- a/gcc/match.pd
> +++ b/gcc/match.pd
> @@ -1159,13 +1159,22 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
>   (simplify
>    (copysigns @0 REAL_CST@1)
>    (if (!REAL_VALUE_NEGATIVE (TREE_REAL_CST (@1)))
> -   (abs @0))))
> +   (abs @0)
> +#if GIMPLE
> +   (if (!direct_internal_fn_supported_p (IFN_COPYSIGN, type,
> +					 OPTIMIZE_FOR_BOTH))
> +    (negate (abs @0)))
> +#endif
> +   )))
>
> +#if GIMPLE
>  /* Transform fneg (fabs (X)) -> copysign (X, -1).  */
>  (simplify
>   (negate (abs @0))
> - (IFN_COPYSIGN @0 { build_minus_one_cst (type); }))
> -
> + (if (direct_internal_fn_supported_p (IFN_COPYSIGN, type,
> +				      OPTIMIZE_FOR_BOTH))
> +   (IFN_COPYSIGN @0 { build_minus_one_cst (type); })))
> +#endif
>  /* copysign(copysign(x, y), z) -> copysign(x, z).  */
>  (for copysigns (COPYSIGN_ALL)
>   (simplify
> diff --git a/gcc/testsuite/gcc.dg/fold-copysign-1.c b/gcc/testsuite/gcc.dg/fold-copysign-1.c
> index f9cafd14ab05f5e8ab2f6f68e62801d21c2df6a6..96b80c733794fffada1b08274ef39cc8f6e442ce 100644
> --- a/gcc/testsuite/gcc.dg/fold-copysign-1.c
> +++ b/gcc/testsuite/gcc.dg/fold-copysign-1.c
> @@ -1,5 +1,6 @@
>  /* { dg-do compile } */
>  /* { dg-options "-O -fdump-tree-cddce1" } */
> +/* { dg-additional-options "-msse -mfpmath=sse" { target { { i?86-*-* x86_64-*-* } && ilp32 } } } */
>
>  double foo (double x)
>  {
> @@ -12,5 +13,7 @@ double bar (double x)
>    return __builtin_copysign (x, minuszero);
>  }
>
> -/* { dg-final { scan-tree-dump-times "__builtin_copysign" 1 "cddce1" } } */
> -/* { dg-final { scan-tree-dump-times "= ABS_EXPR" 1 "cddce1" } } */
> +/* { dg-final { scan-tree-dump-times "__builtin_copysign" 1 "cddce1" { target ifn_copysign } } } */
> +/* { dg-final { scan-tree-dump-times "= ABS_EXPR" 1 "cddce1" { target ifn_copysign } } } */
> +/* { dg-final { scan-tree-dump-times "= -" 1 "cddce1" { target { ! ifn_copysign } } } } */
> +/* { dg-final { scan-tree-dump-times "= ABS_EXPR" 2 "cddce1" { target { ! ifn_copysign } } } } */
> diff --git a/gcc/testsuite/gcc.dg/pr55152-2.c b/gcc/testsuite/gcc.dg/pr55152-2.c
> index 605f202ed6bc7aa8fe921457b02ff0b88cc63ce6..24068cffa4a8e2807ba7d16c4ed3def4f736e797 100644
> --- a/gcc/testsuite/gcc.dg/pr55152-2.c
> +++ b/gcc/testsuite/gcc.dg/pr55152-2.c
> @@ -1,5 +1,6 @@
>  /* { dg-do compile } */
>  /* { dg-options "-O -ffinite-math-only -fno-signed-zeros -fstrict-overflow -fdump-tree-optimized" } */
> +/* { dg-additional-options "-msse -mfpmath=sse" { target { { i?86-*-* x86_64-*-* } && ilp32 } } } */
>
>  double g (double a)
>  {
> @@ -10,5 +11,6 @@ int f(int a)
>    return (a<-a)?a:-a;
>  }
>
> -/* { dg-final { scan-tree-dump-times "\.COPYSIGN" 1 "optimized" } } */
> -/* { dg-final { scan-tree-dump-times "ABS_EXPR" 1 "optimized" } } */
> +/* { dg-final { scan-tree-dump-times "\.COPYSIGN" 1 "optimized" { target ifn_copysign } } } */
> +/* { dg-final { scan-tree-dump-times "ABS_EXPR" 1 "optimized" { target ifn_copysign } } } */
> +/* { dg-final { scan-tree-dump-times "ABS_EXPR" 2 "optimized" { target { ! ifn_copysign } } } } */
> diff --git a/gcc/testsuite/gcc.dg/tree-ssa/abs-4.c b/gcc/testsuite/gcc.dg/tree-ssa/abs-4.c
> index e1b825f37f69ac3c4666b3a52d733368805ad31d..80fa448df1259c7dba406797f4198205783a2fba 100644
> --- a/gcc/testsuite/gcc.dg/tree-ssa/abs-4.c
> +++ b/gcc/testsuite/gcc.dg/tree-ssa/abs-4.c
> @@ -1,5 +1,6 @@
>  /* { dg-do compile } */
>  /* { dg-options "-O1 -fdump-tree-optimized" } */
> +/* { dg-additional-options "-msse -mfpmath=sse" { target { { i?86-*-* x86_64-*-* } && ilp32 } } } */
>  /* PR tree-optimization/109829 */
>
>  float abs_f(float x) { return __builtin_signbit(x) ? x : -x; }
> @@ -9,6 +10,8 @@ long double abs_ld(long double x) { return __builtin_signbit(x) ? x : -x; }
>
>  /* __builtin_signbit(x) ? x : -x. Should be convert into - ABS_EXP<x> */
>  /* { dg-final { scan-tree-dump-not "signbit" "optimized"} } */
> -/* { dg-final { scan-tree-dump-times "= ABS_EXPR" 1 "optimized"} } */
> -/* { dg-final { scan-tree-dump-times "= -" 1 "optimized"} } */
> -/* { dg-final { scan-tree-dump-times "= \.COPYSIGN" 2 "optimized"} } */
> +/* { dg-final { scan-tree-dump-times "= ABS_EXPR" 1 "optimized" { target ifn_copysign } } } */
> +/* { dg-final { scan-tree-dump-times "= -" 1 "optimized" { target ifn_copysign } } } */
> +/* { dg-final { scan-tree-dump-times "= \.COPYSIGN" 2 "optimized" { target ifn_copysign } } } */
> +/* { dg-final { scan-tree-dump-times "= ABS_EXPR" 3 "optimized" { target { ! ifn_copysign } } } } */
> +/* { dg-final { scan-tree-dump-times "= -" 3 "optimized" { target { ! ifn_copysign } } } } */
> diff --git a/gcc/testsuite/gcc.dg/tree-ssa/backprop-6.c b/gcc/testsuite/gcc.dg/tree-ssa/backprop-6.c
> index c3a138642d6ff7be984e91fa1343cb2718db7ae1..4087ba93018bb71710102eb379460bc760020081 100644
> --- a/gcc/testsuite/gcc.dg/tree-ssa/backprop-6.c
> +++ b/gcc/testsuite/gcc.dg/tree-ssa/backprop-6.c
> @@ -1,5 +1,6 @@
>  /* { dg-do compile } */
>  /* { dg-options "-O -fdump-tree-backprop-details" }  */
> +/* { dg-additional-options "-msse -mfpmath=sse" { target { { i?86-*-* x86_64-*-* } && ilp32 } } } */
>
>  void start (void *);
>  void end (void *);
> @@ -26,6 +27,8 @@ TEST_FUNCTION (float, f)
>  TEST_FUNCTION (double, )
>  TEST_FUNCTION (long double, l)
>
> -/* { dg-final { scan-tree-dump-times {Deleting[^\n]* = -} 4 "backprop" } } */
> -/* { dg-final { scan-tree-dump-times {Deleting[^\n]* = \.COPYSIGN} 2 "backprop" } } */
> -/* { dg-final { scan-tree-dump-times {Deleting[^\n]* = ABS_EXPR <} 1 "backprop" } } */
> +/* { dg-final { scan-tree-dump-times {Deleting[^\n]* = -} 4 "backprop" { target ifn_copysign } } } */
> +/* { dg-final { scan-tree-dump-times {Deleting[^\n]* = \.COPYSIGN} 2 "backprop" { target ifn_copysign } } } */
> +/* { dg-final { scan-tree-dump-times {Deleting[^\n]* = ABS_EXPR <} 1 "backprop" { target ifn_copysign } } } */
> +/* { dg-final { scan-tree-dump-times {Deleting[^\n]* = -} 6 "backprop" { target { ! ifn_copysign } } } } */
> +/* { dg-final { scan-tree-dump-times {Deleting[^\n]* = ABS_EXPR <} 3 "backprop" { target { ! ifn_copysign } } } } */
> diff --git a/gcc/testsuite/gcc.dg/tree-ssa/copy-sign-2.c b/gcc/testsuite/gcc.dg/tree-ssa/copy-sign-2.c
> index e5d565c4b9832c00106588ef411fbd8c292a5cad..e43bc315bef2bd11c11cfd2685f5088e792b7bf7 100644
> --- a/gcc/testsuite/gcc.dg/tree-ssa/copy-sign-2.c
> +++ b/gcc/testsuite/gcc.dg/tree-ssa/copy-sign-2.c
> @@ -1,4 +1,5 @@
>  /* { dg-options "-O2 -ffast-math -fdump-tree-optimized" } */
> +/* { dg-additional-options "-msse -mfpmath=sse" { target { { i?86-*-* x86_64-*-* } && ilp32 } } } */
>  /* { dg-do compile } */
>  float f(float x)
>  {
> @@ -10,5 +11,6 @@ float f1(float x)
>    float t = __builtin_copysignf (1.0f, -x);
>    return x * t;
>  }
> -/* { dg-final { scan-tree-dump-times "ABS" 1 "optimized"} } */
> -/* { dg-final { scan-tree-dump-times ".COPYSIGN" 1 "optimized"} } */
> +/* { dg-final { scan-tree-dump-times "ABS" 1 "optimized" { target ifn_copysign } } } */
> +/* { dg-final { scan-tree-dump-times ".COPYSIGN" 1 "optimized" { target ifn_copysign } } } */
> +/* { dg-final { scan-tree-dump-times "ABS" 2 "optimized" { target { ! ifn_copysign } } } } */
> diff --git a/gcc/testsuite/gcc.dg/tree-ssa/mult-abs-2.c b/gcc/testsuite/gcc.dg/tree-ssa/mult-abs-2.c
> index a22896b21c8b5a4d5d8e28bd8ae0db896e63ade0..675127cfe56b2e9aa9d4c06e2bdce62b59545a08 100644
> --- a/gcc/testsuite/gcc.dg/tree-ssa/mult-abs-2.c
> +++ b/gcc/testsuite/gcc.dg/tree-ssa/mult-abs-2.c
> @@ -34,5 +34,5 @@ float i1(float x)
>  {
>    return x * (x <= 0.f ? 1.f : -1.f);
>  }
> -/* { dg-final { scan-tree-dump-times "ABS" 4 "gimple"} } */
> -/* { dg-final { scan-tree-dump-times "\.COPYSIGN" 4 "gimple"} } */
> +
> +/* { dg-final { scan-tree-dump-times "ABS" 8 "gimple" } } */
> diff --git a/gcc/testsuite/lib/target-supports.exp b/gcc/testsuite/lib/target-supports.exp
> index 7f13ff0ca565efdf19065811f3301db897329073..64a081cfafd78e5ccb7322d3b382f800b30bbe70 100644
> --- a/gcc/testsuite/lib/target-supports.exp
> +++ b/gcc/testsuite/lib/target-supports.exp
> @@ -7830,6 +7830,29 @@ proc check_effective_target_xorsign { } {
>  	     || [istarget aarch64*-*-*] || [istarget arm*-*-*] }}]
>  }
>
> +# Return 1 if the target plus current options supports folding of
> +# copysign into IFN_COPYSIGN.
> +#
> +# This won't change for different subtargets so cache the result.
> +
> +proc check_effective_target_ifn_copysign { } {
> +    return [check_cached_effective_target_indexed ifn_copysign {
> +      expr {
> +	 (([istarget i?86-*-*] || [istarget x86_64-*-*])
> +	   && [is-effective-target sse])
> +	 || ([istarget loongarch*-*-*] && [check_effective_target_loongarch_sx])
> +	 || ([istarget powerpc*-*-*]
> +	     && ![istarget powerpc-*-linux*paired*])
> +	 || [istarget alpha*-*-*]
> +	 || [istarget aarch64*-*-*]
> +	 || [is-effective-target arm_neon]
> +	 || ([istarget s390*-*-*]
> +	     && [check_effective_target_s390_vx])
> +	 || ([istarget riscv*-*-*]
> +	     && [check_effective_target_riscv_v])

Unless I'm missing something, we have copysign in the scalar 
floating-point ISAs as well.  So I think this should be

      || ([istarget riscv*-*-*]
          && [check_effective_target_hard_float])

> +	}}]
> +}
> +
>  # Return 1 if the target plus current options supports a vector
>  # widening summation of *short* args into *int* result, 0 otherwise.
>  #
Xi Ruoyao Jan. 4, 2024, 10:38 p.m. UTC | #2
On Thu, 2024-01-04 at 14:32 -0800, Palmer Dabbelt wrote:
> > +proc check_effective_target_ifn_copysign { } {
> > +    return [check_cached_effective_target_indexed ifn_copysign {
> > +      expr {
> > +	 (([istarget i?86-*-*] || [istarget x86_64-*-*])
> > +	   && [is-effective-target sse])
> > +	 || ([istarget loongarch*-*-*] && [check_effective_target_loongarch_sx])

LoongArch has [scalar FP copysign][1] too.

[1]:https://loongson.github.io/LoongArch-Documentation/LoongArch-Vol1-EN.html#_fscaleblogbcopysign_sd

> > +	 || ([istarget powerpc*-*-*]
> > +	     && ![istarget powerpc-*-linux*paired*])
> > +	 || [istarget alpha*-*-*]
> > +	 || [istarget aarch64*-*-*]
> > +	 || [is-effective-target arm_neon]
> > +	 || ([istarget s390*-*-*]
> > +	     && [check_effective_target_s390_vx])
> > +	 || ([istarget riscv*-*-*]
> > +	     && [check_effective_target_riscv_v])
> 
> Unless I'm missing something, we have copysign in the scalar 
> floating-point ISAs as well.  So I think this should be
> 
>       || ([istarget riscv*-*-*]
>           && [check_effective_target_hard_float])
Tamar Christina Jan. 5, 2024, 11:02 a.m. UTC | #3
> -----Original Message-----
> From: Xi Ruoyao <xry111@xry111.site>
> Sent: Thursday, January 4, 2024 10:39 PM
> To: Palmer Dabbelt <palmer@dabbelt.com>; Tamar Christina
> <Tamar.Christina@arm.com>
> Cc: gcc-patches@gcc.gnu.org; nd <nd@arm.com>; rguenther@suse.de; Jeff Law
> <jlaw@ventanamicro.com>
> Subject: Re: [PATCH]middle-end: Don't apply copysign optimization if target does
> not implement optab [PR112468]
> 
> On Thu, 2024-01-04 at 14:32 -0800, Palmer Dabbelt wrote:
> > > +proc check_effective_target_ifn_copysign { } {
> > > +    return [check_cached_effective_target_indexed ifn_copysign {
> > > +      expr {
> > > +	 (([istarget i?86-*-*] || [istarget x86_64-*-*])
> > > +	   && [is-effective-target sse])
> > > +	 || ([istarget loongarch*-*-*] && [check_effective_target_loongarch_sx])
> 
> LoongArch has [scalar FP copysign][1] too.
> 
> [1]:https://loongson.github.io/LoongArch-Documentation/LoongArch-Vol1-
> EN.html#_fscaleblogbcopysign_sd

Ok, so something like:

|| ([istarget loongarch*-*-*] && ([check_effective_target_loongarch_sx] ||  [check_effective_target_hard_float]))
?

> 
> > > +	 || ([istarget powerpc*-*-*]
> > > +	     && ![istarget powerpc-*-linux*paired*])
> > > +	 || [istarget alpha*-*-*]
> > > +	 || [istarget aarch64*-*-*]
> > > +	 || [is-effective-target arm_neon]
> > > +	 || ([istarget s390*-*-*]
> > > +	     && [check_effective_target_s390_vx])
> > > +	 || ([istarget riscv*-*-*]
> > > +	     && [check_effective_target_riscv_v])
> >
> > Unless I'm missing something, we have copysign in the scalar
> > floating-point ISAs as well.  So I think this should be
> >
> >       || ([istarget riscv*-*-*]
> >           && [check_effective_target_hard_float])
> 

Ah cool, will update it in next version. 

Thanks,
Tamar

> --
> Xi Ruoyao <xry111@xry111.site>
> School of Aerospace Science and Technology, Xidian University
Xi Ruoyao Jan. 5, 2024, 11:47 a.m. UTC | #4
On Fri, 2024-01-05 at 11:02 +0000, Tamar Christina wrote:
> Ok, so something like:
> 
> > > ([istarget loongarch*-*-*] &&
> > > ([check_effective_target_loongarch_sx] || 
> > > [check_effective_target_hard_float]))
> ?

We don't need "[check_effective_target_loongarch_sx] ||" because SIMD
requires hard float.
Tamar Christina Jan. 5, 2024, 1:30 p.m. UTC | #5
> On Fri, 2024-01-05 at 11:02 +0000, Tamar Christina wrote:
> > Ok, so something like:
> >
> > > > ([istarget loongarch*-*-*] &&
> > > > ([check_effective_target_loongarch_sx] ||
> > > > [check_effective_target_hard_float]))
> > ?
> 
> We don't need "[check_effective_target_loongarch_sx] ||" because SIMD
> requires hard float.
> 

Cool, thanks! 

--

Hi All,

currently GCC does not treat IFN_COPYSIGN the same as the copysign tree expr.
The latter has a libcall fallback and the IFN can only do optabs.

Because of this the change I made to optimize copysign only works if the
target has impemented the optab, but it should work for those that have the
libcall too.

More annoyingly if a target has vector versions of ABS and NEG but not COPYSIGN
then the change made them lose vectorization.

The proper fix for this is to treat the IFN the same as the tree EXPR and to
enhance expand_COPYSIGN to also support vector calls.

I have such a patch for GCC 15 but it's quite big and too invasive for stage-4.
As such this is a minimal fix, just don't apply the transformation and leave
targets which don't have the optab unoptimized.

Targets list for check_effective_target_ifn_copysign was gotten by grepping for
copysign and looking at the optab.

Bootstrapped Regtested on aarch64-none-linux-gnu and no issues.
Tests ran in x86_64-pc-linux-gnu -m32 and tests no longer fail.

Ok for master?

Thanks,
Tamar

gcc/ChangeLog:

	PR tree-optimization/112468
	* doc/sourcebuild.texi: Document ifn_copysign.
	* match.pd: Only apply transformation if target supports the IFN.

gcc/testsuite/ChangeLog:

	PR tree-optimization/112468
	* gcc.dg/fold-copysign-1.c: Modify tests based on if target supports
	IFN_COPYSIGN.
	* gcc.dg/pr55152-2.c: Likewise.
	* gcc.dg/tree-ssa/abs-4.c: Likewise.
	* gcc.dg/tree-ssa/backprop-6.c: Likewise.
	* gcc.dg/tree-ssa/copy-sign-2.c: Likewise.
	* gcc.dg/tree-ssa/mult-abs-2.c: Likewise.
	* lib/target-supports.exp (check_effective_target_ifn_copysign): New.

--- inline copy of patch ---

diff --git a/gcc/doc/sourcebuild.texi b/gcc/doc/sourcebuild.texi
index 4be67daedb20d394857c02739389cabf23c0d533..f4847dafe65cbbf8c9de34905f614ef6957658b4 100644
--- a/gcc/doc/sourcebuild.texi
+++ b/gcc/doc/sourcebuild.texi
@@ -2664,6 +2664,10 @@ Target requires a command line argument to enable a SIMD instruction set.
 @item xorsign
 Target supports the xorsign optab expansion.
 
+@item ifn_copysign
+Target supports the IFN_COPYSIGN optab expansion for both scalar and vector
+types.
+
 @end table
 
 @subsubsection Environment attributes
diff --git a/gcc/match.pd b/gcc/match.pd
index d57e29bfe1d68afd4df4dda20fecc2405ff05332..87d13e7e3e1aa6d89119142b614890dc4729b521 100644
--- a/gcc/match.pd
+++ b/gcc/match.pd
@@ -1159,13 +1159,22 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
  (simplify
   (copysigns @0 REAL_CST@1)
   (if (!REAL_VALUE_NEGATIVE (TREE_REAL_CST (@1)))
-   (abs @0))))
+   (abs @0)
+#if GIMPLE
+   (if (!direct_internal_fn_supported_p (IFN_COPYSIGN, type,
+					 OPTIMIZE_FOR_BOTH))
+    (negate (abs @0)))
+#endif
+   )))
 
+#if GIMPLE
 /* Transform fneg (fabs (X)) -> copysign (X, -1).  */
 (simplify
  (negate (abs @0))
- (IFN_COPYSIGN @0 { build_minus_one_cst (type); }))
-
+ (if (direct_internal_fn_supported_p (IFN_COPYSIGN, type,
+				      OPTIMIZE_FOR_BOTH))
+   (IFN_COPYSIGN @0 { build_minus_one_cst (type); })))
+#endif
 /* copysign(copysign(x, y), z) -> copysign(x, z).  */
 (for copysigns (COPYSIGN_ALL)
  (simplify
diff --git a/gcc/testsuite/gcc.dg/fold-copysign-1.c b/gcc/testsuite/gcc.dg/fold-copysign-1.c
index f9cafd14ab05f5e8ab2f6f68e62801d21c2df6a6..96b80c733794fffada1b08274ef39cc8f6e442ce 100644
--- a/gcc/testsuite/gcc.dg/fold-copysign-1.c
+++ b/gcc/testsuite/gcc.dg/fold-copysign-1.c
@@ -1,5 +1,6 @@
 /* { dg-do compile } */
 /* { dg-options "-O -fdump-tree-cddce1" } */
+/* { dg-additional-options "-msse -mfpmath=sse" { target { { i?86-*-* x86_64-*-* } && ilp32 } } } */
 
 double foo (double x)
 {
@@ -12,5 +13,7 @@ double bar (double x)
   return __builtin_copysign (x, minuszero);
 }
 
-/* { dg-final { scan-tree-dump-times "__builtin_copysign" 1 "cddce1" } } */
-/* { dg-final { scan-tree-dump-times "= ABS_EXPR" 1 "cddce1" } } */
+/* { dg-final { scan-tree-dump-times "__builtin_copysign" 1 "cddce1" { target ifn_copysign } } } */
+/* { dg-final { scan-tree-dump-times "= ABS_EXPR" 1 "cddce1" { target ifn_copysign } } } */
+/* { dg-final { scan-tree-dump-times "= -" 1 "cddce1" { target { ! ifn_copysign } } } } */
+/* { dg-final { scan-tree-dump-times "= ABS_EXPR" 2 "cddce1" { target { ! ifn_copysign } } } } */
diff --git a/gcc/testsuite/gcc.dg/pr55152-2.c b/gcc/testsuite/gcc.dg/pr55152-2.c
index 605f202ed6bc7aa8fe921457b02ff0b88cc63ce6..24068cffa4a8e2807ba7d16c4ed3def4f736e797 100644
--- a/gcc/testsuite/gcc.dg/pr55152-2.c
+++ b/gcc/testsuite/gcc.dg/pr55152-2.c
@@ -1,5 +1,6 @@
 /* { dg-do compile } */
 /* { dg-options "-O -ffinite-math-only -fno-signed-zeros -fstrict-overflow -fdump-tree-optimized" } */
+/* { dg-additional-options "-msse -mfpmath=sse" { target { { i?86-*-* x86_64-*-* } && ilp32 } } } */
 
 double g (double a)
 {
@@ -10,5 +11,6 @@ int f(int a)
   return (a<-a)?a:-a;
 }
 
-/* { dg-final { scan-tree-dump-times "\.COPYSIGN" 1 "optimized" } } */
-/* { dg-final { scan-tree-dump-times "ABS_EXPR" 1 "optimized" } } */
+/* { dg-final { scan-tree-dump-times "\.COPYSIGN" 1 "optimized" { target ifn_copysign } } } */
+/* { dg-final { scan-tree-dump-times "ABS_EXPR" 1 "optimized" { target ifn_copysign } } } */
+/* { dg-final { scan-tree-dump-times "ABS_EXPR" 2 "optimized" { target { ! ifn_copysign } } } } */
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/abs-4.c b/gcc/testsuite/gcc.dg/tree-ssa/abs-4.c
index e1b825f37f69ac3c4666b3a52d733368805ad31d..80fa448df1259c7dba406797f4198205783a2fba 100644
--- a/gcc/testsuite/gcc.dg/tree-ssa/abs-4.c
+++ b/gcc/testsuite/gcc.dg/tree-ssa/abs-4.c
@@ -1,5 +1,6 @@
 /* { dg-do compile } */
 /* { dg-options "-O1 -fdump-tree-optimized" } */
+/* { dg-additional-options "-msse -mfpmath=sse" { target { { i?86-*-* x86_64-*-* } && ilp32 } } } */
 /* PR tree-optimization/109829 */
 
 float abs_f(float x) { return __builtin_signbit(x) ? x : -x; }
@@ -9,6 +10,8 @@ long double abs_ld(long double x) { return __builtin_signbit(x) ? x : -x; }
 
 /* __builtin_signbit(x) ? x : -x. Should be convert into - ABS_EXP<x> */
 /* { dg-final { scan-tree-dump-not "signbit" "optimized"} } */
-/* { dg-final { scan-tree-dump-times "= ABS_EXPR" 1 "optimized"} } */
-/* { dg-final { scan-tree-dump-times "= -" 1 "optimized"} } */
-/* { dg-final { scan-tree-dump-times "= \.COPYSIGN" 2 "optimized"} } */
+/* { dg-final { scan-tree-dump-times "= ABS_EXPR" 1 "optimized" { target ifn_copysign } } } */
+/* { dg-final { scan-tree-dump-times "= -" 1 "optimized" { target ifn_copysign } } } */
+/* { dg-final { scan-tree-dump-times "= \.COPYSIGN" 2 "optimized" { target ifn_copysign } } } */
+/* { dg-final { scan-tree-dump-times "= ABS_EXPR" 3 "optimized" { target { ! ifn_copysign } } } } */
+/* { dg-final { scan-tree-dump-times "= -" 3 "optimized" { target { ! ifn_copysign } } } } */
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/backprop-6.c b/gcc/testsuite/gcc.dg/tree-ssa/backprop-6.c
index c3a138642d6ff7be984e91fa1343cb2718db7ae1..4087ba93018bb71710102eb379460bc760020081 100644
--- a/gcc/testsuite/gcc.dg/tree-ssa/backprop-6.c
+++ b/gcc/testsuite/gcc.dg/tree-ssa/backprop-6.c
@@ -1,5 +1,6 @@
 /* { dg-do compile } */
 /* { dg-options "-O -fdump-tree-backprop-details" }  */
+/* { dg-additional-options "-msse -mfpmath=sse" { target { { i?86-*-* x86_64-*-* } && ilp32 } } } */
 
 void start (void *);
 void end (void *);
@@ -26,6 +27,8 @@ TEST_FUNCTION (float, f)
 TEST_FUNCTION (double, )
 TEST_FUNCTION (long double, l)
 
-/* { dg-final { scan-tree-dump-times {Deleting[^\n]* = -} 4 "backprop" } } */
-/* { dg-final { scan-tree-dump-times {Deleting[^\n]* = \.COPYSIGN} 2 "backprop" } } */
-/* { dg-final { scan-tree-dump-times {Deleting[^\n]* = ABS_EXPR <} 1 "backprop" } } */
+/* { dg-final { scan-tree-dump-times {Deleting[^\n]* = -} 4 "backprop" { target ifn_copysign } } } */
+/* { dg-final { scan-tree-dump-times {Deleting[^\n]* = \.COPYSIGN} 2 "backprop" { target ifn_copysign } } } */
+/* { dg-final { scan-tree-dump-times {Deleting[^\n]* = ABS_EXPR <} 1 "backprop" { target ifn_copysign } } } */
+/* { dg-final { scan-tree-dump-times {Deleting[^\n]* = -} 6 "backprop" { target { ! ifn_copysign } } } } */
+/* { dg-final { scan-tree-dump-times {Deleting[^\n]* = ABS_EXPR <} 3 "backprop" { target { ! ifn_copysign } } } } */
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/copy-sign-2.c b/gcc/testsuite/gcc.dg/tree-ssa/copy-sign-2.c
index e5d565c4b9832c00106588ef411fbd8c292a5cad..e43bc315bef2bd11c11cfd2685f5088e792b7bf7 100644
--- a/gcc/testsuite/gcc.dg/tree-ssa/copy-sign-2.c
+++ b/gcc/testsuite/gcc.dg/tree-ssa/copy-sign-2.c
@@ -1,4 +1,5 @@
 /* { dg-options "-O2 -ffast-math -fdump-tree-optimized" } */
+/* { dg-additional-options "-msse -mfpmath=sse" { target { { i?86-*-* x86_64-*-* } && ilp32 } } } */
 /* { dg-do compile } */
 float f(float x)
 {
@@ -10,5 +11,6 @@ float f1(float x)
   float t = __builtin_copysignf (1.0f, -x);
   return x * t;
 }
-/* { dg-final { scan-tree-dump-times "ABS" 1 "optimized"} } */
-/* { dg-final { scan-tree-dump-times ".COPYSIGN" 1 "optimized"} } */
+/* { dg-final { scan-tree-dump-times "ABS" 1 "optimized" { target ifn_copysign } } } */
+/* { dg-final { scan-tree-dump-times ".COPYSIGN" 1 "optimized" { target ifn_copysign } } } */
+/* { dg-final { scan-tree-dump-times "ABS" 2 "optimized" { target { ! ifn_copysign } } } } */
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/mult-abs-2.c b/gcc/testsuite/gcc.dg/tree-ssa/mult-abs-2.c
index a22896b21c8b5a4d5d8e28bd8ae0db896e63ade0..675127cfe56b2e9aa9d4c06e2bdce62b59545a08 100644
--- a/gcc/testsuite/gcc.dg/tree-ssa/mult-abs-2.c
+++ b/gcc/testsuite/gcc.dg/tree-ssa/mult-abs-2.c
@@ -34,5 +34,5 @@ float i1(float x)
 {
   return x * (x <= 0.f ? 1.f : -1.f);
 }
-/* { dg-final { scan-tree-dump-times "ABS" 4 "gimple"} } */
-/* { dg-final { scan-tree-dump-times "\.COPYSIGN" 4 "gimple"} } */
+
+/* { dg-final { scan-tree-dump-times "ABS" 8 "gimple" } } */
diff --git a/gcc/testsuite/lib/target-supports.exp b/gcc/testsuite/lib/target-supports.exp
index 7f13ff0ca565efdf19065811f3301db897329073..f0765a14fb78f2267f54f5ae79a86f4ab644152b 100644
--- a/gcc/testsuite/lib/target-supports.exp
+++ b/gcc/testsuite/lib/target-supports.exp
@@ -7830,6 +7830,30 @@ proc check_effective_target_xorsign { } {
 	     || [istarget aarch64*-*-*] || [istarget arm*-*-*] }}]
 }
 
+# Return 1 if the target plus current options supports folding of
+# copysign into IFN_COPYSIGN.
+#
+# This won't change for different subtargets so cache the result.
+
+proc check_effective_target_ifn_copysign { } {
+    return [check_cached_effective_target_indexed ifn_copysign {
+      expr {
+	 (([istarget i?86-*-*] || [istarget x86_64-*-*])
+	   && [is-effective-target sse])
+	 || ([istarget loongarch*-*-*]
+	     && [check_effective_target_hard_float])
+	 || ([istarget powerpc*-*-*]
+	     && ![istarget powerpc-*-linux*paired*])
+	 || [istarget alpha*-*-*]
+	 || [istarget aarch64*-*-*]
+	 || [is-effective-target arm_neon]
+	 || ([istarget s390*-*-*]
+	     && [check_effective_target_s390_vx])
+	 || ([istarget riscv*-*-*]
+	     && [check_effective_target_hard_float])
+	}}]
+}
+
 # Return 1 if the target plus current options supports a vector
 # widening summation of *short* args into *int* result, 0 otherwise.
 #
Tamar Christina Jan. 10, 2024, 9:25 a.m. UTC | #6
ping

> -----Original Message-----
> From: Tamar Christina <Tamar.Christina@arm.com>
> Sent: Friday, January 5, 2024 1:31 PM
> To: Xi Ruoyao <xry111@xry111.site>; Palmer Dabbelt <palmer@dabbelt.com>
> Cc: gcc-patches@gcc.gnu.org; nd <nd@arm.com>; rguenther@suse.de; Jeff Law
> <jlaw@ventanamicro.com>
> Subject: RE: [PATCH]middle-end: Don't apply copysign optimization if target does
> not implement optab [PR112468]
> 
> > On Fri, 2024-01-05 at 11:02 +0000, Tamar Christina wrote:
> > > Ok, so something like:
> > >
> > > > > ([istarget loongarch*-*-*] &&
> > > > > ([check_effective_target_loongarch_sx] ||
> > > > > [check_effective_target_hard_float]))
> > > ?
> >
> > We don't need "[check_effective_target_loongarch_sx] ||" because SIMD
> > requires hard float.
> >
> 
> Cool, thanks!
> 
> --
> 
> Hi All,
> 
> currently GCC does not treat IFN_COPYSIGN the same as the copysign tree expr.
> The latter has a libcall fallback and the IFN can only do optabs.
> 
> Because of this the change I made to optimize copysign only works if the
> target has impemented the optab, but it should work for those that have the
> libcall too.
> 
> More annoyingly if a target has vector versions of ABS and NEG but not COPYSIGN
> then the change made them lose vectorization.
> 
> The proper fix for this is to treat the IFN the same as the tree EXPR and to
> enhance expand_COPYSIGN to also support vector calls.
> 
> I have such a patch for GCC 15 but it's quite big and too invasive for stage-4.
> As such this is a minimal fix, just don't apply the transformation and leave
> targets which don't have the optab unoptimized.
> 
> Targets list for check_effective_target_ifn_copysign was gotten by grepping for
> copysign and looking at the optab.
> 
> Bootstrapped Regtested on aarch64-none-linux-gnu and no issues.
> Tests ran in x86_64-pc-linux-gnu -m32 and tests no longer fail.
> 
> Ok for master?
> 
> Thanks,
> Tamar
> 
> gcc/ChangeLog:
> 
> 	PR tree-optimization/112468
> 	* doc/sourcebuild.texi: Document ifn_copysign.
> 	* match.pd: Only apply transformation if target supports the IFN.
> 
> gcc/testsuite/ChangeLog:
> 
> 	PR tree-optimization/112468
> 	* gcc.dg/fold-copysign-1.c: Modify tests based on if target supports
> 	IFN_COPYSIGN.
> 	* gcc.dg/pr55152-2.c: Likewise.
> 	* gcc.dg/tree-ssa/abs-4.c: Likewise.
> 	* gcc.dg/tree-ssa/backprop-6.c: Likewise.
> 	* gcc.dg/tree-ssa/copy-sign-2.c: Likewise.
> 	* gcc.dg/tree-ssa/mult-abs-2.c: Likewise.
> 	* lib/target-supports.exp (check_effective_target_ifn_copysign): New.
> 
> --- inline copy of patch ---
> 
> diff --git a/gcc/doc/sourcebuild.texi b/gcc/doc/sourcebuild.texi
> index
> 4be67daedb20d394857c02739389cabf23c0d533..f4847dafe65cbbf8c9de3490
> 5f614ef6957658b4 100644
> --- a/gcc/doc/sourcebuild.texi
> +++ b/gcc/doc/sourcebuild.texi
> @@ -2664,6 +2664,10 @@ Target requires a command line argument to enable a
> SIMD instruction set.
>  @item xorsign
>  Target supports the xorsign optab expansion.
> 
> +@item ifn_copysign
> +Target supports the IFN_COPYSIGN optab expansion for both scalar and vector
> +types.
> +
>  @end table
> 
>  @subsubsection Environment attributes
> diff --git a/gcc/match.pd b/gcc/match.pd
> index
> d57e29bfe1d68afd4df4dda20fecc2405ff05332..87d13e7e3e1aa6d89119142b6
> 14890dc4729b521 100644
> --- a/gcc/match.pd
> +++ b/gcc/match.pd
> @@ -1159,13 +1159,22 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
>   (simplify
>    (copysigns @0 REAL_CST@1)
>    (if (!REAL_VALUE_NEGATIVE (TREE_REAL_CST (@1)))
> -   (abs @0))))
> +   (abs @0)
> +#if GIMPLE
> +   (if (!direct_internal_fn_supported_p (IFN_COPYSIGN, type,
> +					 OPTIMIZE_FOR_BOTH))
> +    (negate (abs @0)))
> +#endif
> +   )))
> 
> +#if GIMPLE
>  /* Transform fneg (fabs (X)) -> copysign (X, -1).  */
>  (simplify
>   (negate (abs @0))
> - (IFN_COPYSIGN @0 { build_minus_one_cst (type); }))
> -
> + (if (direct_internal_fn_supported_p (IFN_COPYSIGN, type,
> +				      OPTIMIZE_FOR_BOTH))
> +   (IFN_COPYSIGN @0 { build_minus_one_cst (type); })))
> +#endif
>  /* copysign(copysign(x, y), z) -> copysign(x, z).  */
>  (for copysigns (COPYSIGN_ALL)
>   (simplify
> diff --git a/gcc/testsuite/gcc.dg/fold-copysign-1.c b/gcc/testsuite/gcc.dg/fold-
> copysign-1.c
> index
> f9cafd14ab05f5e8ab2f6f68e62801d21c2df6a6..96b80c733794fffada1b08274ef
> 39cc8f6e442ce 100644
> --- a/gcc/testsuite/gcc.dg/fold-copysign-1.c
> +++ b/gcc/testsuite/gcc.dg/fold-copysign-1.c
> @@ -1,5 +1,6 @@
>  /* { dg-do compile } */
>  /* { dg-options "-O -fdump-tree-cddce1" } */
> +/* { dg-additional-options "-msse -mfpmath=sse" { target { { i?86-*-* x86_64-*-*
> } && ilp32 } } } */
> 
>  double foo (double x)
>  {
> @@ -12,5 +13,7 @@ double bar (double x)
>    return __builtin_copysign (x, minuszero);
>  }
> 
> -/* { dg-final { scan-tree-dump-times "__builtin_copysign" 1 "cddce1" } } */
> -/* { dg-final { scan-tree-dump-times "= ABS_EXPR" 1 "cddce1" } } */
> +/* { dg-final { scan-tree-dump-times "__builtin_copysign" 1 "cddce1" { target
> ifn_copysign } } } */
> +/* { dg-final { scan-tree-dump-times "= ABS_EXPR" 1 "cddce1" { target
> ifn_copysign } } } */
> +/* { dg-final { scan-tree-dump-times "= -" 1 "cddce1" { target { ! ifn_copysign } } } }
> */
> +/* { dg-final { scan-tree-dump-times "= ABS_EXPR" 2 "cddce1" { target { !
> ifn_copysign } } } } */
> diff --git a/gcc/testsuite/gcc.dg/pr55152-2.c b/gcc/testsuite/gcc.dg/pr55152-2.c
> index
> 605f202ed6bc7aa8fe921457b02ff0b88cc63ce6..24068cffa4a8e2807ba7d16c4e
> d3def4f736e797 100644
> --- a/gcc/testsuite/gcc.dg/pr55152-2.c
> +++ b/gcc/testsuite/gcc.dg/pr55152-2.c
> @@ -1,5 +1,6 @@
>  /* { dg-do compile } */
>  /* { dg-options "-O -ffinite-math-only -fno-signed-zeros -fstrict-overflow -fdump-
> tree-optimized" } */
> +/* { dg-additional-options "-msse -mfpmath=sse" { target { { i?86-*-* x86_64-*-*
> } && ilp32 } } } */
> 
>  double g (double a)
>  {
> @@ -10,5 +11,6 @@ int f(int a)
>    return (a<-a)?a:-a;
>  }
> 
> -/* { dg-final { scan-tree-dump-times "\.COPYSIGN" 1 "optimized" } } */
> -/* { dg-final { scan-tree-dump-times "ABS_EXPR" 1 "optimized" } } */
> +/* { dg-final { scan-tree-dump-times "\.COPYSIGN" 1 "optimized" { target
> ifn_copysign } } } */
> +/* { dg-final { scan-tree-dump-times "ABS_EXPR" 1 "optimized" { target
> ifn_copysign } } } */
> +/* { dg-final { scan-tree-dump-times "ABS_EXPR" 2 "optimized" { target { !
> ifn_copysign } } } } */
> diff --git a/gcc/testsuite/gcc.dg/tree-ssa/abs-4.c b/gcc/testsuite/gcc.dg/tree-
> ssa/abs-4.c
> index
> e1b825f37f69ac3c4666b3a52d733368805ad31d..80fa448df1259c7dba406797
> f4198205783a2fba 100644
> --- a/gcc/testsuite/gcc.dg/tree-ssa/abs-4.c
> +++ b/gcc/testsuite/gcc.dg/tree-ssa/abs-4.c
> @@ -1,5 +1,6 @@
>  /* { dg-do compile } */
>  /* { dg-options "-O1 -fdump-tree-optimized" } */
> +/* { dg-additional-options "-msse -mfpmath=sse" { target { { i?86-*-* x86_64-*-*
> } && ilp32 } } } */
>  /* PR tree-optimization/109829 */
> 
>  float abs_f(float x) { return __builtin_signbit(x) ? x : -x; }
> @@ -9,6 +10,8 @@ long double abs_ld(long double x) { return __builtin_signbit(x)
> ? x : -x; }
> 
>  /* __builtin_signbit(x) ? x : -x. Should be convert into - ABS_EXP<x> */
>  /* { dg-final { scan-tree-dump-not "signbit" "optimized"} } */
> -/* { dg-final { scan-tree-dump-times "= ABS_EXPR" 1 "optimized"} } */
> -/* { dg-final { scan-tree-dump-times "= -" 1 "optimized"} } */
> -/* { dg-final { scan-tree-dump-times "= \.COPYSIGN" 2 "optimized"} } */
> +/* { dg-final { scan-tree-dump-times "= ABS_EXPR" 1 "optimized" { target
> ifn_copysign } } } */
> +/* { dg-final { scan-tree-dump-times "= -" 1 "optimized" { target ifn_copysign } } }
> */
> +/* { dg-final { scan-tree-dump-times "= \.COPYSIGN" 2 "optimized" { target
> ifn_copysign } } } */
> +/* { dg-final { scan-tree-dump-times "= ABS_EXPR" 3 "optimized" { target { !
> ifn_copysign } } } } */
> +/* { dg-final { scan-tree-dump-times "= -" 3 "optimized" { target { ! ifn_copysign }
> } } } */
> diff --git a/gcc/testsuite/gcc.dg/tree-ssa/backprop-6.c
> b/gcc/testsuite/gcc.dg/tree-ssa/backprop-6.c
> index
> c3a138642d6ff7be984e91fa1343cb2718db7ae1..4087ba93018bb71710102eb
> 379460bc760020081 100644
> --- a/gcc/testsuite/gcc.dg/tree-ssa/backprop-6.c
> +++ b/gcc/testsuite/gcc.dg/tree-ssa/backprop-6.c
> @@ -1,5 +1,6 @@
>  /* { dg-do compile } */
>  /* { dg-options "-O -fdump-tree-backprop-details" }  */
> +/* { dg-additional-options "-msse -mfpmath=sse" { target { { i?86-*-* x86_64-*-*
> } && ilp32 } } } */
> 
>  void start (void *);
>  void end (void *);
> @@ -26,6 +27,8 @@ TEST_FUNCTION (float, f)
>  TEST_FUNCTION (double, )
>  TEST_FUNCTION (long double, l)
> 
> -/* { dg-final { scan-tree-dump-times {Deleting[^\n]* = -} 4 "backprop" } } */
> -/* { dg-final { scan-tree-dump-times {Deleting[^\n]* = \.COPYSIGN} 2 "backprop"
> } } */
> -/* { dg-final { scan-tree-dump-times {Deleting[^\n]* = ABS_EXPR <} 1 "backprop"
> } } */
> +/* { dg-final { scan-tree-dump-times {Deleting[^\n]* = -} 4 "backprop" { target
> ifn_copysign } } } */
> +/* { dg-final { scan-tree-dump-times {Deleting[^\n]* = \.COPYSIGN} 2 "backprop"
> { target ifn_copysign } } } */
> +/* { dg-final { scan-tree-dump-times {Deleting[^\n]* = ABS_EXPR <} 1 "backprop"
> { target ifn_copysign } } } */
> +/* { dg-final { scan-tree-dump-times {Deleting[^\n]* = -} 6 "backprop" { target { !
> ifn_copysign } } } } */
> +/* { dg-final { scan-tree-dump-times {Deleting[^\n]* = ABS_EXPR <} 3 "backprop"
> { target { ! ifn_copysign } } } } */
> diff --git a/gcc/testsuite/gcc.dg/tree-ssa/copy-sign-2.c
> b/gcc/testsuite/gcc.dg/tree-ssa/copy-sign-2.c
> index
> e5d565c4b9832c00106588ef411fbd8c292a5cad..e43bc315bef2bd11c11cfd268
> 5f5088e792b7bf7 100644
> --- a/gcc/testsuite/gcc.dg/tree-ssa/copy-sign-2.c
> +++ b/gcc/testsuite/gcc.dg/tree-ssa/copy-sign-2.c
> @@ -1,4 +1,5 @@
>  /* { dg-options "-O2 -ffast-math -fdump-tree-optimized" } */
> +/* { dg-additional-options "-msse -mfpmath=sse" { target { { i?86-*-* x86_64-*-*
> } && ilp32 } } } */
>  /* { dg-do compile } */
>  float f(float x)
>  {
> @@ -10,5 +11,6 @@ float f1(float x)
>    float t = __builtin_copysignf (1.0f, -x);
>    return x * t;
>  }
> -/* { dg-final { scan-tree-dump-times "ABS" 1 "optimized"} } */
> -/* { dg-final { scan-tree-dump-times ".COPYSIGN" 1 "optimized"} } */
> +/* { dg-final { scan-tree-dump-times "ABS" 1 "optimized" { target ifn_copysign } }
> } */
> +/* { dg-final { scan-tree-dump-times ".COPYSIGN" 1 "optimized" { target
> ifn_copysign } } } */
> +/* { dg-final { scan-tree-dump-times "ABS" 2 "optimized" { target { ! ifn_copysign
> } } } } */
> diff --git a/gcc/testsuite/gcc.dg/tree-ssa/mult-abs-2.c
> b/gcc/testsuite/gcc.dg/tree-ssa/mult-abs-2.c
> index
> a22896b21c8b5a4d5d8e28bd8ae0db896e63ade0..675127cfe56b2e9aa9d4c06
> e2bdce62b59545a08 100644
> --- a/gcc/testsuite/gcc.dg/tree-ssa/mult-abs-2.c
> +++ b/gcc/testsuite/gcc.dg/tree-ssa/mult-abs-2.c
> @@ -34,5 +34,5 @@ float i1(float x)
>  {
>    return x * (x <= 0.f ? 1.f : -1.f);
>  }
> -/* { dg-final { scan-tree-dump-times "ABS" 4 "gimple"} } */
> -/* { dg-final { scan-tree-dump-times "\.COPYSIGN" 4 "gimple"} } */
> +
> +/* { dg-final { scan-tree-dump-times "ABS" 8 "gimple" } } */
> diff --git a/gcc/testsuite/lib/target-supports.exp b/gcc/testsuite/lib/target-
> supports.exp
> index
> 7f13ff0ca565efdf19065811f3301db897329073..f0765a14fb78f2267f54f5ae79
> a86f4ab644152b 100644
> --- a/gcc/testsuite/lib/target-supports.exp
> +++ b/gcc/testsuite/lib/target-supports.exp
> @@ -7830,6 +7830,30 @@ proc check_effective_target_xorsign { } {
>  	     || [istarget aarch64*-*-*] || [istarget arm*-*-*] }}]
>  }
> 
> +# Return 1 if the target plus current options supports folding of
> +# copysign into IFN_COPYSIGN.
> +#
> +# This won't change for different subtargets so cache the result.
> +
> +proc check_effective_target_ifn_copysign { } {
> +    return [check_cached_effective_target_indexed ifn_copysign {
> +      expr {
> +	 (([istarget i?86-*-*] || [istarget x86_64-*-*])
> +	   && [is-effective-target sse])
> +	 || ([istarget loongarch*-*-*]
> +	     && [check_effective_target_hard_float])
> +	 || ([istarget powerpc*-*-*]
> +	     && ![istarget powerpc-*-linux*paired*])
> +	 || [istarget alpha*-*-*]
> +	 || [istarget aarch64*-*-*]
> +	 || [is-effective-target arm_neon]
> +	 || ([istarget s390*-*-*]
> +	     && [check_effective_target_s390_vx])
> +	 || ([istarget riscv*-*-*]
> +	     && [check_effective_target_hard_float])
> +	}}]
> +}
> +
>  # Return 1 if the target plus current options supports a vector
>  # widening summation of *short* args into *int* result, 0 otherwise.
>  #
Richard Biener Jan. 10, 2024, 12:29 p.m. UTC | #7
On Fri, 5 Jan 2024, Tamar Christina wrote:

> > On Fri, 2024-01-05 at 11:02 +0000, Tamar Christina wrote:
> > > Ok, so something like:
> > >
> > > > > ([istarget loongarch*-*-*] &&
> > > > > ([check_effective_target_loongarch_sx] ||
> > > > > [check_effective_target_hard_float]))
> > > ?
> > 
> > We don't need "[check_effective_target_loongarch_sx] ||" because SIMD
> > requires hard float.
> > 
> 
> Cool, thanks! 
> 
> --
> 
> Hi All,
> 
> currently GCC does not treat IFN_COPYSIGN the same as the copysign tree expr.
> The latter has a libcall fallback and the IFN can only do optabs.
> 
> Because of this the change I made to optimize copysign only works if the
> target has impemented the optab, but it should work for those that have the
> libcall too.
> 
> More annoyingly if a target has vector versions of ABS and NEG but not COPYSIGN
> then the change made them lose vectorization.
> 
> The proper fix for this is to treat the IFN the same as the tree EXPR and to
> enhance expand_COPYSIGN to also support vector calls.

I don't think that will work - you'd still need to check for the
availability of the function, otherwise you'll end up with link
errors.  I think you instead want to verify that fallback expansion
with expand_copysign_absneg or expand_copysign_bit will work, thus
we'll never emit a libcall.  In fact I think we might want to require
that all targets either implement a copysign optab or allow such
fallback expansion given its such a core functionality.

> I have such a patch for GCC 15 but it's quite big and too invasive for stage-4.
> As such this is a minimal fix, just don't apply the transformation and leave
> targets which don't have the optab unoptimized.
> 
> Targets list for check_effective_target_ifn_copysign was gotten by grepping for
> copysign and looking at the optab.
> 
> Bootstrapped Regtested on aarch64-none-linux-gnu and no issues.
> Tests ran in x86_64-pc-linux-gnu -m32 and tests no longer fail.
> 
> Ok for master?
> 
> Thanks,
> Tamar
> 
> gcc/ChangeLog:
> 
> 	PR tree-optimization/112468
> 	* doc/sourcebuild.texi: Document ifn_copysign.
> 	* match.pd: Only apply transformation if target supports the IFN.
> 
> gcc/testsuite/ChangeLog:
> 
> 	PR tree-optimization/112468
> 	* gcc.dg/fold-copysign-1.c: Modify tests based on if target supports
> 	IFN_COPYSIGN.
> 	* gcc.dg/pr55152-2.c: Likewise.
> 	* gcc.dg/tree-ssa/abs-4.c: Likewise.
> 	* gcc.dg/tree-ssa/backprop-6.c: Likewise.
> 	* gcc.dg/tree-ssa/copy-sign-2.c: Likewise.
> 	* gcc.dg/tree-ssa/mult-abs-2.c: Likewise.
> 	* lib/target-supports.exp (check_effective_target_ifn_copysign): New.
> 
> --- inline copy of patch ---
> 
> diff --git a/gcc/doc/sourcebuild.texi b/gcc/doc/sourcebuild.texi
> index 4be67daedb20d394857c02739389cabf23c0d533..f4847dafe65cbbf8c9de34905f614ef6957658b4 100644
> --- a/gcc/doc/sourcebuild.texi
> +++ b/gcc/doc/sourcebuild.texi
> @@ -2664,6 +2664,10 @@ Target requires a command line argument to enable a SIMD instruction set.
>  @item xorsign
>  Target supports the xorsign optab expansion.
>  
> +@item ifn_copysign
> +Target supports the IFN_COPYSIGN optab expansion for both scalar and vector
> +types.

Target supports the copysign optab expansion for both scalar and vector
modes.

Note this leaves the actual modes required unspecified - can we
restrict this to float and double?

> +
>  @end table
>  
>  @subsubsection Environment attributes
> diff --git a/gcc/match.pd b/gcc/match.pd
> index d57e29bfe1d68afd4df4dda20fecc2405ff05332..87d13e7e3e1aa6d89119142b614890dc4729b521 100644
> --- a/gcc/match.pd
> +++ b/gcc/match.pd
> @@ -1159,13 +1159,22 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
>   (simplify
>    (copysigns @0 REAL_CST@1)
>    (if (!REAL_VALUE_NEGATIVE (TREE_REAL_CST (@1)))
> -   (abs @0))))
> +   (abs @0)
> +#if GIMPLE
> +   (if (!direct_internal_fn_supported_p (IFN_COPYSIGN, type,
> +					 OPTIMIZE_FOR_BOTH))
> +    (negate (abs @0)))
> +#endif
> +   )))
>  
> +#if GIMPLE
>  /* Transform fneg (fabs (X)) -> copysign (X, -1).  */
>  (simplify
>   (negate (abs @0))
> - (IFN_COPYSIGN @0 { build_minus_one_cst (type); }))
> -
> + (if (direct_internal_fn_supported_p (IFN_COPYSIGN, type,
> +				      OPTIMIZE_FOR_BOTH))
> +   (IFN_COPYSIGN @0 { build_minus_one_cst (type); })))
> +#endif

I think we want to update the comments as well to note that
copysign (x, -1) is canonical for -abs(x) when copysign
is implemented as optab.

OK with these changes.

Richard.

>  /* copysign(copysign(x, y), z) -> copysign(x, z).  */
>  (for copysigns (COPYSIGN_ALL)
>   (simplify
> diff --git a/gcc/testsuite/gcc.dg/fold-copysign-1.c b/gcc/testsuite/gcc.dg/fold-copysign-1.c
> index f9cafd14ab05f5e8ab2f6f68e62801d21c2df6a6..96b80c733794fffada1b08274ef39cc8f6e442ce 100644
> --- a/gcc/testsuite/gcc.dg/fold-copysign-1.c
> +++ b/gcc/testsuite/gcc.dg/fold-copysign-1.c
> @@ -1,5 +1,6 @@
>  /* { dg-do compile } */
>  /* { dg-options "-O -fdump-tree-cddce1" } */
> +/* { dg-additional-options "-msse -mfpmath=sse" { target { { i?86-*-* x86_64-*-* } && ilp32 } } } */
>  
>  double foo (double x)
>  {
> @@ -12,5 +13,7 @@ double bar (double x)
>    return __builtin_copysign (x, minuszero);
>  }
>  
> -/* { dg-final { scan-tree-dump-times "__builtin_copysign" 1 "cddce1" } } */
> -/* { dg-final { scan-tree-dump-times "= ABS_EXPR" 1 "cddce1" } } */
> +/* { dg-final { scan-tree-dump-times "__builtin_copysign" 1 "cddce1" { target ifn_copysign } } } */
> +/* { dg-final { scan-tree-dump-times "= ABS_EXPR" 1 "cddce1" { target ifn_copysign } } } */
> +/* { dg-final { scan-tree-dump-times "= -" 1 "cddce1" { target { ! ifn_copysign } } } } */
> +/* { dg-final { scan-tree-dump-times "= ABS_EXPR" 2 "cddce1" { target { ! ifn_copysign } } } } */
> diff --git a/gcc/testsuite/gcc.dg/pr55152-2.c b/gcc/testsuite/gcc.dg/pr55152-2.c
> index 605f202ed6bc7aa8fe921457b02ff0b88cc63ce6..24068cffa4a8e2807ba7d16c4ed3def4f736e797 100644
> --- a/gcc/testsuite/gcc.dg/pr55152-2.c
> +++ b/gcc/testsuite/gcc.dg/pr55152-2.c
> @@ -1,5 +1,6 @@
>  /* { dg-do compile } */
>  /* { dg-options "-O -ffinite-math-only -fno-signed-zeros -fstrict-overflow -fdump-tree-optimized" } */
> +/* { dg-additional-options "-msse -mfpmath=sse" { target { { i?86-*-* x86_64-*-* } && ilp32 } } } */
>  
>  double g (double a)
>  {
> @@ -10,5 +11,6 @@ int f(int a)
>    return (a<-a)?a:-a;
>  }
>  
> -/* { dg-final { scan-tree-dump-times "\.COPYSIGN" 1 "optimized" } } */
> -/* { dg-final { scan-tree-dump-times "ABS_EXPR" 1 "optimized" } } */
> +/* { dg-final { scan-tree-dump-times "\.COPYSIGN" 1 "optimized" { target ifn_copysign } } } */
> +/* { dg-final { scan-tree-dump-times "ABS_EXPR" 1 "optimized" { target ifn_copysign } } } */
> +/* { dg-final { scan-tree-dump-times "ABS_EXPR" 2 "optimized" { target { ! ifn_copysign } } } } */
> diff --git a/gcc/testsuite/gcc.dg/tree-ssa/abs-4.c b/gcc/testsuite/gcc.dg/tree-ssa/abs-4.c
> index e1b825f37f69ac3c4666b3a52d733368805ad31d..80fa448df1259c7dba406797f4198205783a2fba 100644
> --- a/gcc/testsuite/gcc.dg/tree-ssa/abs-4.c
> +++ b/gcc/testsuite/gcc.dg/tree-ssa/abs-4.c
> @@ -1,5 +1,6 @@
>  /* { dg-do compile } */
>  /* { dg-options "-O1 -fdump-tree-optimized" } */
> +/* { dg-additional-options "-msse -mfpmath=sse" { target { { i?86-*-* x86_64-*-* } && ilp32 } } } */
>  /* PR tree-optimization/109829 */
>  
>  float abs_f(float x) { return __builtin_signbit(x) ? x : -x; }
> @@ -9,6 +10,8 @@ long double abs_ld(long double x) { return __builtin_signbit(x) ? x : -x; }
>  
>  /* __builtin_signbit(x) ? x : -x. Should be convert into - ABS_EXP<x> */
>  /* { dg-final { scan-tree-dump-not "signbit" "optimized"} } */
> -/* { dg-final { scan-tree-dump-times "= ABS_EXPR" 1 "optimized"} } */
> -/* { dg-final { scan-tree-dump-times "= -" 1 "optimized"} } */
> -/* { dg-final { scan-tree-dump-times "= \.COPYSIGN" 2 "optimized"} } */
> +/* { dg-final { scan-tree-dump-times "= ABS_EXPR" 1 "optimized" { target ifn_copysign } } } */
> +/* { dg-final { scan-tree-dump-times "= -" 1 "optimized" { target ifn_copysign } } } */
> +/* { dg-final { scan-tree-dump-times "= \.COPYSIGN" 2 "optimized" { target ifn_copysign } } } */
> +/* { dg-final { scan-tree-dump-times "= ABS_EXPR" 3 "optimized" { target { ! ifn_copysign } } } } */
> +/* { dg-final { scan-tree-dump-times "= -" 3 "optimized" { target { ! ifn_copysign } } } } */
> diff --git a/gcc/testsuite/gcc.dg/tree-ssa/backprop-6.c b/gcc/testsuite/gcc.dg/tree-ssa/backprop-6.c
> index c3a138642d6ff7be984e91fa1343cb2718db7ae1..4087ba93018bb71710102eb379460bc760020081 100644
> --- a/gcc/testsuite/gcc.dg/tree-ssa/backprop-6.c
> +++ b/gcc/testsuite/gcc.dg/tree-ssa/backprop-6.c
> @@ -1,5 +1,6 @@
>  /* { dg-do compile } */
>  /* { dg-options "-O -fdump-tree-backprop-details" }  */
> +/* { dg-additional-options "-msse -mfpmath=sse" { target { { i?86-*-* x86_64-*-* } && ilp32 } } } */
>  
>  void start (void *);
>  void end (void *);
> @@ -26,6 +27,8 @@ TEST_FUNCTION (float, f)
>  TEST_FUNCTION (double, )
>  TEST_FUNCTION (long double, l)
>  
> -/* { dg-final { scan-tree-dump-times {Deleting[^\n]* = -} 4 "backprop" } } */
> -/* { dg-final { scan-tree-dump-times {Deleting[^\n]* = \.COPYSIGN} 2 "backprop" } } */
> -/* { dg-final { scan-tree-dump-times {Deleting[^\n]* = ABS_EXPR <} 1 "backprop" } } */
> +/* { dg-final { scan-tree-dump-times {Deleting[^\n]* = -} 4 "backprop" { target ifn_copysign } } } */
> +/* { dg-final { scan-tree-dump-times {Deleting[^\n]* = \.COPYSIGN} 2 "backprop" { target ifn_copysign } } } */
> +/* { dg-final { scan-tree-dump-times {Deleting[^\n]* = ABS_EXPR <} 1 "backprop" { target ifn_copysign } } } */
> +/* { dg-final { scan-tree-dump-times {Deleting[^\n]* = -} 6 "backprop" { target { ! ifn_copysign } } } } */
> +/* { dg-final { scan-tree-dump-times {Deleting[^\n]* = ABS_EXPR <} 3 "backprop" { target { ! ifn_copysign } } } } */
> diff --git a/gcc/testsuite/gcc.dg/tree-ssa/copy-sign-2.c b/gcc/testsuite/gcc.dg/tree-ssa/copy-sign-2.c
> index e5d565c4b9832c00106588ef411fbd8c292a5cad..e43bc315bef2bd11c11cfd2685f5088e792b7bf7 100644
> --- a/gcc/testsuite/gcc.dg/tree-ssa/copy-sign-2.c
> +++ b/gcc/testsuite/gcc.dg/tree-ssa/copy-sign-2.c
> @@ -1,4 +1,5 @@
>  /* { dg-options "-O2 -ffast-math -fdump-tree-optimized" } */
> +/* { dg-additional-options "-msse -mfpmath=sse" { target { { i?86-*-* x86_64-*-* } && ilp32 } } } */
>  /* { dg-do compile } */
>  float f(float x)
>  {
> @@ -10,5 +11,6 @@ float f1(float x)
>    float t = __builtin_copysignf (1.0f, -x);
>    return x * t;
>  }
> -/* { dg-final { scan-tree-dump-times "ABS" 1 "optimized"} } */
> -/* { dg-final { scan-tree-dump-times ".COPYSIGN" 1 "optimized"} } */
> +/* { dg-final { scan-tree-dump-times "ABS" 1 "optimized" { target ifn_copysign } } } */
> +/* { dg-final { scan-tree-dump-times ".COPYSIGN" 1 "optimized" { target ifn_copysign } } } */
> +/* { dg-final { scan-tree-dump-times "ABS" 2 "optimized" { target { ! ifn_copysign } } } } */
> diff --git a/gcc/testsuite/gcc.dg/tree-ssa/mult-abs-2.c b/gcc/testsuite/gcc.dg/tree-ssa/mult-abs-2.c
> index a22896b21c8b5a4d5d8e28bd8ae0db896e63ade0..675127cfe56b2e9aa9d4c06e2bdce62b59545a08 100644
> --- a/gcc/testsuite/gcc.dg/tree-ssa/mult-abs-2.c
> +++ b/gcc/testsuite/gcc.dg/tree-ssa/mult-abs-2.c
> @@ -34,5 +34,5 @@ float i1(float x)
>  {
>    return x * (x <= 0.f ? 1.f : -1.f);
>  }
> -/* { dg-final { scan-tree-dump-times "ABS" 4 "gimple"} } */
> -/* { dg-final { scan-tree-dump-times "\.COPYSIGN" 4 "gimple"} } */
> +
> +/* { dg-final { scan-tree-dump-times "ABS" 8 "gimple" } } */
> diff --git a/gcc/testsuite/lib/target-supports.exp b/gcc/testsuite/lib/target-supports.exp
> index 7f13ff0ca565efdf19065811f3301db897329073..f0765a14fb78f2267f54f5ae79a86f4ab644152b 100644
> --- a/gcc/testsuite/lib/target-supports.exp
> +++ b/gcc/testsuite/lib/target-supports.exp
> @@ -7830,6 +7830,30 @@ proc check_effective_target_xorsign { } {
>  	     || [istarget aarch64*-*-*] || [istarget arm*-*-*] }}]
>  }
>  
> +# Return 1 if the target plus current options supports folding of
> +# copysign into IFN_COPYSIGN.
> +#
> +# This won't change for different subtargets so cache the result.
> +
> +proc check_effective_target_ifn_copysign { } {
> +    return [check_cached_effective_target_indexed ifn_copysign {
> +      expr {
> +	 (([istarget i?86-*-*] || [istarget x86_64-*-*])
> +	   && [is-effective-target sse])
> +	 || ([istarget loongarch*-*-*]
> +	     && [check_effective_target_hard_float])
> +	 || ([istarget powerpc*-*-*]
> +	     && ![istarget powerpc-*-linux*paired*])
> +	 || [istarget alpha*-*-*]
> +	 || [istarget aarch64*-*-*]
> +	 || [is-effective-target arm_neon]
> +	 || ([istarget s390*-*-*]
> +	     && [check_effective_target_s390_vx])
> +	 || ([istarget riscv*-*-*]
> +	     && [check_effective_target_hard_float])
> +	}}]
> +}
> +
>  # Return 1 if the target plus current options supports a vector
>  # widening summation of *short* args into *int* result, 0 otherwise.
>  #
>
diff mbox series

Patch

--- a/gcc/doc/sourcebuild.texi
+++ b/gcc/doc/sourcebuild.texi
@@ -2664,6 +2664,10 @@  Target requires a command line argument to enable a SIMD instruction set.
 @item xorsign
 Target supports the xorsign optab expansion.
 
+@item ifn_copysign
+Target supports the IFN_COPYSIGN optab expansion for both scalar and vector
+types.
+
 @end table
 
 @subsubsection Environment attributes
diff --git a/gcc/match.pd b/gcc/match.pd
index d57e29bfe1d68afd4df4dda20fecc2405ff05332..87d13e7e3e1aa6d89119142b614890dc4729b521 100644
--- a/gcc/match.pd
+++ b/gcc/match.pd
@@ -1159,13 +1159,22 @@  DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
  (simplify
   (copysigns @0 REAL_CST@1)
   (if (!REAL_VALUE_NEGATIVE (TREE_REAL_CST (@1)))
-   (abs @0))))
+   (abs @0)
+#if GIMPLE
+   (if (!direct_internal_fn_supported_p (IFN_COPYSIGN, type,
+					 OPTIMIZE_FOR_BOTH))
+    (negate (abs @0)))
+#endif
+   )))
 
+#if GIMPLE
 /* Transform fneg (fabs (X)) -> copysign (X, -1).  */
 (simplify
  (negate (abs @0))
- (IFN_COPYSIGN @0 { build_minus_one_cst (type); }))
-
+ (if (direct_internal_fn_supported_p (IFN_COPYSIGN, type,
+				      OPTIMIZE_FOR_BOTH))
+   (IFN_COPYSIGN @0 { build_minus_one_cst (type); })))
+#endif
 /* copysign(copysign(x, y), z) -> copysign(x, z).  */
 (for copysigns (COPYSIGN_ALL)
  (simplify
diff --git a/gcc/testsuite/gcc.dg/fold-copysign-1.c b/gcc/testsuite/gcc.dg/fold-copysign-1.c
index f9cafd14ab05f5e8ab2f6f68e62801d21c2df6a6..96b80c733794fffada1b08274ef39cc8f6e442ce 100644
--- a/gcc/testsuite/gcc.dg/fold-copysign-1.c
+++ b/gcc/testsuite/gcc.dg/fold-copysign-1.c
@@ -1,5 +1,6 @@ 
 /* { dg-do compile } */
 /* { dg-options "-O -fdump-tree-cddce1" } */
+/* { dg-additional-options "-msse -mfpmath=sse" { target { { i?86-*-* x86_64-*-* } && ilp32 } } } */
 
 double foo (double x)
 {
@@ -12,5 +13,7 @@  double bar (double x)
   return __builtin_copysign (x, minuszero);
 }
 
-/* { dg-final { scan-tree-dump-times "__builtin_copysign" 1 "cddce1" } } */
-/* { dg-final { scan-tree-dump-times "= ABS_EXPR" 1 "cddce1" } } */
+/* { dg-final { scan-tree-dump-times "__builtin_copysign" 1 "cddce1" { target ifn_copysign } } } */
+/* { dg-final { scan-tree-dump-times "= ABS_EXPR" 1 "cddce1" { target ifn_copysign } } } */
+/* { dg-final { scan-tree-dump-times "= -" 1 "cddce1" { target { ! ifn_copysign } } } } */
+/* { dg-final { scan-tree-dump-times "= ABS_EXPR" 2 "cddce1" { target { ! ifn_copysign } } } } */
diff --git a/gcc/testsuite/gcc.dg/pr55152-2.c b/gcc/testsuite/gcc.dg/pr55152-2.c
index 605f202ed6bc7aa8fe921457b02ff0b88cc63ce6..24068cffa4a8e2807ba7d16c4ed3def4f736e797 100644
--- a/gcc/testsuite/gcc.dg/pr55152-2.c
+++ b/gcc/testsuite/gcc.dg/pr55152-2.c
@@ -1,5 +1,6 @@ 
 /* { dg-do compile } */
 /* { dg-options "-O -ffinite-math-only -fno-signed-zeros -fstrict-overflow -fdump-tree-optimized" } */
+/* { dg-additional-options "-msse -mfpmath=sse" { target { { i?86-*-* x86_64-*-* } && ilp32 } } } */
 
 double g (double a)
 {
@@ -10,5 +11,6 @@  int f(int a)
   return (a<-a)?a:-a;
 }
 
-/* { dg-final { scan-tree-dump-times "\.COPYSIGN" 1 "optimized" } } */
-/* { dg-final { scan-tree-dump-times "ABS_EXPR" 1 "optimized" } } */
+/* { dg-final { scan-tree-dump-times "\.COPYSIGN" 1 "optimized" { target ifn_copysign } } } */
+/* { dg-final { scan-tree-dump-times "ABS_EXPR" 1 "optimized" { target ifn_copysign } } } */
+/* { dg-final { scan-tree-dump-times "ABS_EXPR" 2 "optimized" { target { ! ifn_copysign } } } } */
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/abs-4.c b/gcc/testsuite/gcc.dg/tree-ssa/abs-4.c
index e1b825f37f69ac3c4666b3a52d733368805ad31d..80fa448df1259c7dba406797f4198205783a2fba 100644
--- a/gcc/testsuite/gcc.dg/tree-ssa/abs-4.c
+++ b/gcc/testsuite/gcc.dg/tree-ssa/abs-4.c
@@ -1,5 +1,6 @@ 
 /* { dg-do compile } */
 /* { dg-options "-O1 -fdump-tree-optimized" } */
+/* { dg-additional-options "-msse -mfpmath=sse" { target { { i?86-*-* x86_64-*-* } && ilp32 } } } */
 /* PR tree-optimization/109829 */
 
 float abs_f(float x) { return __builtin_signbit(x) ? x : -x; }
@@ -9,6 +10,8 @@  long double abs_ld(long double x) { return __builtin_signbit(x) ? x : -x; }
 
 /* __builtin_signbit(x) ? x : -x. Should be convert into - ABS_EXP<x> */
 /* { dg-final { scan-tree-dump-not "signbit" "optimized"} } */
-/* { dg-final { scan-tree-dump-times "= ABS_EXPR" 1 "optimized"} } */
-/* { dg-final { scan-tree-dump-times "= -" 1 "optimized"} } */
-/* { dg-final { scan-tree-dump-times "= \.COPYSIGN" 2 "optimized"} } */
+/* { dg-final { scan-tree-dump-times "= ABS_EXPR" 1 "optimized" { target ifn_copysign } } } */
+/* { dg-final { scan-tree-dump-times "= -" 1 "optimized" { target ifn_copysign } } } */
+/* { dg-final { scan-tree-dump-times "= \.COPYSIGN" 2 "optimized" { target ifn_copysign } } } */
+/* { dg-final { scan-tree-dump-times "= ABS_EXPR" 3 "optimized" { target { ! ifn_copysign } } } } */
+/* { dg-final { scan-tree-dump-times "= -" 3 "optimized" { target { ! ifn_copysign } } } } */
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/backprop-6.c b/gcc/testsuite/gcc.dg/tree-ssa/backprop-6.c
index c3a138642d6ff7be984e91fa1343cb2718db7ae1..4087ba93018bb71710102eb379460bc760020081 100644
--- a/gcc/testsuite/gcc.dg/tree-ssa/backprop-6.c
+++ b/gcc/testsuite/gcc.dg/tree-ssa/backprop-6.c
@@ -1,5 +1,6 @@ 
 /* { dg-do compile } */
 /* { dg-options "-O -fdump-tree-backprop-details" }  */
+/* { dg-additional-options "-msse -mfpmath=sse" { target { { i?86-*-* x86_64-*-* } && ilp32 } } } */
 
 void start (void *);
 void end (void *);
@@ -26,6 +27,8 @@  TEST_FUNCTION (float, f)
 TEST_FUNCTION (double, )
 TEST_FUNCTION (long double, l)
 
-/* { dg-final { scan-tree-dump-times {Deleting[^\n]* = -} 4 "backprop" } } */
-/* { dg-final { scan-tree-dump-times {Deleting[^\n]* = \.COPYSIGN} 2 "backprop" } } */
-/* { dg-final { scan-tree-dump-times {Deleting[^\n]* = ABS_EXPR <} 1 "backprop" } } */
+/* { dg-final { scan-tree-dump-times {Deleting[^\n]* = -} 4 "backprop" { target ifn_copysign } } } */
+/* { dg-final { scan-tree-dump-times {Deleting[^\n]* = \.COPYSIGN} 2 "backprop" { target ifn_copysign } } } */
+/* { dg-final { scan-tree-dump-times {Deleting[^\n]* = ABS_EXPR <} 1 "backprop" { target ifn_copysign } } } */
+/* { dg-final { scan-tree-dump-times {Deleting[^\n]* = -} 6 "backprop" { target { ! ifn_copysign } } } } */
+/* { dg-final { scan-tree-dump-times {Deleting[^\n]* = ABS_EXPR <} 3 "backprop" { target { ! ifn_copysign } } } } */
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/copy-sign-2.c b/gcc/testsuite/gcc.dg/tree-ssa/copy-sign-2.c
index e5d565c4b9832c00106588ef411fbd8c292a5cad..e43bc315bef2bd11c11cfd2685f5088e792b7bf7 100644
--- a/gcc/testsuite/gcc.dg/tree-ssa/copy-sign-2.c
+++ b/gcc/testsuite/gcc.dg/tree-ssa/copy-sign-2.c
@@ -1,4 +1,5 @@ 
 /* { dg-options "-O2 -ffast-math -fdump-tree-optimized" } */
+/* { dg-additional-options "-msse -mfpmath=sse" { target { { i?86-*-* x86_64-*-* } && ilp32 } } } */
 /* { dg-do compile } */
 float f(float x)
 {
@@ -10,5 +11,6 @@  float f1(float x)
   float t = __builtin_copysignf (1.0f, -x);
   return x * t;
 }
-/* { dg-final { scan-tree-dump-times "ABS" 1 "optimized"} } */
-/* { dg-final { scan-tree-dump-times ".COPYSIGN" 1 "optimized"} } */
+/* { dg-final { scan-tree-dump-times "ABS" 1 "optimized" { target ifn_copysign } } } */
+/* { dg-final { scan-tree-dump-times ".COPYSIGN" 1 "optimized" { target ifn_copysign } } } */
+/* { dg-final { scan-tree-dump-times "ABS" 2 "optimized" { target { ! ifn_copysign } } } } */
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/mult-abs-2.c b/gcc/testsuite/gcc.dg/tree-ssa/mult-abs-2.c
index a22896b21c8b5a4d5d8e28bd8ae0db896e63ade0..675127cfe56b2e9aa9d4c06e2bdce62b59545a08 100644
--- a/gcc/testsuite/gcc.dg/tree-ssa/mult-abs-2.c
+++ b/gcc/testsuite/gcc.dg/tree-ssa/mult-abs-2.c
@@ -34,5 +34,5 @@  float i1(float x)
 {
   return x * (x <= 0.f ? 1.f : -1.f);
 }
-/* { dg-final { scan-tree-dump-times "ABS" 4 "gimple"} } */
-/* { dg-final { scan-tree-dump-times "\.COPYSIGN" 4 "gimple"} } */
+
+/* { dg-final { scan-tree-dump-times "ABS" 8 "gimple" } } */
diff --git a/gcc/testsuite/lib/target-supports.exp b/gcc/testsuite/lib/target-supports.exp
index 7f13ff0ca565efdf19065811f3301db897329073..64a081cfafd78e5ccb7322d3b382f800b30bbe70 100644
--- a/gcc/testsuite/lib/target-supports.exp
+++ b/gcc/testsuite/lib/target-supports.exp
@@ -7830,6 +7830,29 @@  proc check_effective_target_xorsign { } {
 	     || [istarget aarch64*-*-*] || [istarget arm*-*-*] }}]
 }
 
+# Return 1 if the target plus current options supports folding of
+# copysign into IFN_COPYSIGN.
+#
+# This won't change for different subtargets so cache the result.
+
+proc check_effective_target_ifn_copysign { } {
+    return [check_cached_effective_target_indexed ifn_copysign {
+      expr {
+	 (([istarget i?86-*-*] || [istarget x86_64-*-*])
+	   && [is-effective-target sse])
+	 || ([istarget loongarch*-*-*] && [check_effective_target_loongarch_sx])
+	 || ([istarget powerpc*-*-*]
+	     && ![istarget powerpc-*-linux*paired*])
+	 || [istarget alpha*-*-*]
+	 || [istarget aarch64*-*-*]
+	 || [is-effective-target arm_neon]
+	 || ([istarget s390*-*-*]
+	     && [check_effective_target_s390_vx])
+	 || ([istarget riscv*-*-*]
+	     && [check_effective_target_riscv_v])
+	}}]
+}
+
 # Return 1 if the target plus current options supports a vector
 # widening summation of *short* args into *int* result, 0 otherwise.
 #