diff mbox series

[1/2,x86] Add pre_reload splitter to detect fp min/max pattern.

Message ID 20230706011816.3543708-1-hongtao.liu@intel.com
State New
Headers show
Series [1/2,x86] Add pre_reload splitter to detect fp min/max pattern. | expand

Commit Message

Liu, Hongtao July 6, 2023, 1:18 a.m. UTC
We have ix86_expand_sse_fp_minmax to detect min/max sematics, but
it requires rtx_equal_p for cmp_op0/cmp_op1 and if_true/if_false, for
the testcase in the PR, there's an extra move from cmp_op0 to if_true,
and it failed ix86_expand_sse_fp_minmax.

This patch adds pre_reload splitter to detect the min/max pattern.

Operands order in MINSS matters for signed zero and NANs, since the
instruction always returns second operand when any operand is NAN or
both operands are zero.

Bootstrapped and regtested on x86_64-pc-linux-gnu{-m32,}.
Ok for trunk?

gcc/ChangeLog:

	PR target/110170
	* config/i386/i386.md (*ieee_minmax<mode>3_1): New pre_reload
	splitter to detect fp min/max pattern.

gcc/testsuite/ChangeLog:

	* g++.target/i386/pr110170.C: New test.
	* gcc.target/i386/pr110170.c: New test.
---
 gcc/config/i386/i386.md                  | 30 +++++++++
 gcc/testsuite/g++.target/i386/pr110170.C | 78 ++++++++++++++++++++++++
 gcc/testsuite/gcc.target/i386/pr110170.c | 18 ++++++
 3 files changed, 126 insertions(+)
 create mode 100644 gcc/testsuite/g++.target/i386/pr110170.C
 create mode 100644 gcc/testsuite/gcc.target/i386/pr110170.c

Comments

Uros Bizjak July 6, 2023, 6:19 a.m. UTC | #1
On Thu, Jul 6, 2023 at 3:20 AM liuhongt <hongtao.liu@intel.com> wrote:
>
> We have ix86_expand_sse_fp_minmax to detect min/max sematics, but
> it requires rtx_equal_p for cmp_op0/cmp_op1 and if_true/if_false, for
> the testcase in the PR, there's an extra move from cmp_op0 to if_true,
> and it failed ix86_expand_sse_fp_minmax.
>
> This patch adds pre_reload splitter to detect the min/max pattern.
>
> Operands order in MINSS matters for signed zero and NANs, since the
> instruction always returns second operand when any operand is NAN or
> both operands are zero.
>
> Bootstrapped and regtested on x86_64-pc-linux-gnu{-m32,}.
> Ok for trunk?
>
> gcc/ChangeLog:
>
>         PR target/110170
>         * config/i386/i386.md (*ieee_minmax<mode>3_1): New pre_reload
>         splitter to detect fp min/max pattern.
>
> gcc/testsuite/ChangeLog:
>
>         * g++.target/i386/pr110170.C: New test.
>         * gcc.target/i386/pr110170.c: New test.
> ---
>  gcc/config/i386/i386.md                  | 30 +++++++++
>  gcc/testsuite/g++.target/i386/pr110170.C | 78 ++++++++++++++++++++++++
>  gcc/testsuite/gcc.target/i386/pr110170.c | 18 ++++++
>  3 files changed, 126 insertions(+)
>  create mode 100644 gcc/testsuite/g++.target/i386/pr110170.C
>  create mode 100644 gcc/testsuite/gcc.target/i386/pr110170.c
>
> diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md
> index e6ebc461e52..353bb21993d 100644
> --- a/gcc/config/i386/i386.md
> +++ b/gcc/config/i386/i386.md
> @@ -22483,6 +22483,36 @@ (define_insn "*ieee_s<ieee_maxmin><mode>3"
>     (set_attr "type" "sseadd")
>     (set_attr "mode" "<MODE>")])
>
> +;; Operands order in min/max instruction matters for signed zero and NANs.
> +(define_insn_and_split "*ieee_minmax<mode>3_1"
> +  [(set (match_operand:MODEF 0 "register_operand")
> +       (unspec:MODEF
> +         [(match_operand:MODEF 1 "register_operand")
> +          (match_operand:MODEF 2 "register_operand")
> +          (lt:MODEF
> +            (match_operand:MODEF 3 "register_operand")
> +            (match_operand:MODEF 4 "register_operand"))]
> +         UNSPEC_BLENDV))]
> +  "SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH
> +  && ((rtx_equal_p (operands[1], operands[3])
> +       && rtx_equal_p (operands[2], operands[4]))
> +      || (rtx_equal_p (operands[1], operands[4])
> +         && rtx_equal_p (operands[2], operands[3])))
> +  && ix86_pre_reload_split ()"
> +  "#"
> +  "&& 1"
> +  [(const_int 0)]
> +{
> +  int u = (rtx_equal_p (operands[1], operands[3])
> +          && rtx_equal_p (operands[2], operands[4]))
> +          ? UNSPEC_IEEE_MAX : UNSPEC_IEEE_MIN;
> +  emit_move_insn (operands[0],
> +                 gen_rtx_UNSPEC (<MODE>mode,
> +                                 gen_rtvec (2, operands[2], operands[1]),
> +                                 u));
> +  DONE;
> +})

Please split the above pattern into two, one emitting UNSPEC_IEEE_MAX
and the other emitting UNSPEC_IEEE_MIN.

> +
>  ;; Make two stack loads independent:
>  ;;   fld aa              fld aa
>  ;;   fld %st(0)     ->   fld bb
> diff --git a/gcc/testsuite/g++.target/i386/pr110170.C b/gcc/testsuite/g++.target/i386/pr110170.C
> new file mode 100644
> index 00000000000..1e9a781ca74
> --- /dev/null
> +++ b/gcc/testsuite/g++.target/i386/pr110170.C
> @@ -0,0 +1,78 @@
> +/* { dg-do run } */
> +/* { dg-options " -O2 -march=x86-64 -mfpmath=sse -std=gnu++20" } */

The test involves blendv instruction, which is SSE4.1, so it is
pointless to test it without -msse4.1. Please add -msse4.1 instead of
-march=x86_64 and use sse4_runtime target selector, as is the case
with gcc.target/i386/pr90358.c.

> +#include <math.h>
> +
> +void
> +__attribute__((noinline))
> +__cond_swap(double* __x, double* __y) {
> +  bool __r = (*__x < *__y);
> +  auto __tmp = __r ? *__x : *__y;
> +  *__y = __r ? *__y : *__x;
> +  *__x = __tmp;
> +}
> +
> +auto test1() {
> +    double nan = -0.0;
> +    double x = 0.0;
> +    __cond_swap(&nan, &x);
> +    return x == -0.0 && nan == 0.0;
> +}
> +
> +auto test1r() {
> +    double nan = NAN;
> +    double x = 1.0;
> +    __cond_swap(&x, &nan);
> +    return isnan(x) && signbit(x) == 0 && nan == 1.0;
> +}
> +
> +auto test2() {
> +    double nan = NAN;
> +    double x = -1.0;
> +    __cond_swap(&nan, &x);
> +    return isnan(x) && signbit(x) == 0 && nan == -1.0;
> +}
> +
> +auto test2r() {
> +    double nan = NAN;
> +    double x = -1.0;
> +    __cond_swap(&x, &nan);
> +    return isnan(x) && signbit(x) == 0 && nan == -1.0;
> +}
> +
> +auto test3() {
> +    double nan = -NAN;
> +    double x = 1.0;
> +    __cond_swap(&nan, &x);
> +    return isnan(x) && signbit(x) == 1 && nan == 1.0;
> +}
> +
> +auto test3r() {
> +    double nan = -NAN;
> +    double x = 1.0;
> +    __cond_swap(&x, &nan);
> +    return isnan(x) && signbit(x) == 1 && nan == 1.0;
> +}
> +
> +auto test4() {
> +    double nan = -NAN;
> +    double x = -1.0;
> +    __cond_swap(&nan, &x);
> +    return isnan(x) && signbit(x) == 1 && nan == -1.0;
> +}
> +
> +auto test4r() {
> +    double nan = -NAN;
> +    double x = -1.0;
> +    __cond_swap(&x, &nan);
> +    return isnan(x) && signbit(x) == 1 && nan == -1.0;
> +}
> +
> +
> +int main() {
> +    if (
> +        !test1() || !test1r()
> +        || !test2() || !test2r()
> +        || !test3() || !test4r()
> +        || !test4() || !test4r()
> +    ) __builtin_abort();
> +}
> diff --git a/gcc/testsuite/gcc.target/i386/pr110170.c b/gcc/testsuite/gcc.target/i386/pr110170.c
> new file mode 100644
> index 00000000000..0f98545cce3
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/pr110170.c
> @@ -0,0 +1,18 @@
> +/* { dg-do compile { target { ! ia32 } } } */
> +/* { dg-options " -O2 -march=x86-64-v2 -mfpmath=sse" } */

Please also use -msse4.1 instead of -march here. With -mfpmath=sse,
the test is valid also for 32bit targets, you should use -msseregparm
additional options for ia32 (please see gcc.target/i386/pr43546.c
testcase) in the same way as -mregparm to pass SSE arguments in
registers.

Uros.

> +/* { dg-final { scan-assembler-times {(?n)mins[sd]} 2 } } */
> +/* { dg-final { scan-assembler-times {(?n)maxs[sd]} 2 } } */
> +
> +void __cond_swap_df(double* __x, double* __y) {
> +  _Bool __r = (*__x < *__y);
> +  double __tmp = __r ? *__x : *__y;
> +  *__y = __r ? *__y : *__x;
> +  *__x = __tmp;
> +}
> +
> +void __cond_swap_sf(float* __x, float* __y) {
> +  _Bool __r = (*__x < *__y);
> +  float __tmp = __r ? *__x : *__y;
> +  *__y = __r ? *__y : *__x;
> +  *__x = __tmp;
> +}
> --
> 2.39.1.388.g2fc9e9ca3c
>
diff mbox series

Patch

diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md
index e6ebc461e52..353bb21993d 100644
--- a/gcc/config/i386/i386.md
+++ b/gcc/config/i386/i386.md
@@ -22483,6 +22483,36 @@  (define_insn "*ieee_s<ieee_maxmin><mode>3"
    (set_attr "type" "sseadd")
    (set_attr "mode" "<MODE>")])
 
+;; Operands order in min/max instruction matters for signed zero and NANs.
+(define_insn_and_split "*ieee_minmax<mode>3_1"
+  [(set (match_operand:MODEF 0 "register_operand")
+	(unspec:MODEF
+	  [(match_operand:MODEF 1 "register_operand")
+	   (match_operand:MODEF 2 "register_operand")
+	   (lt:MODEF
+	     (match_operand:MODEF 3 "register_operand")
+	     (match_operand:MODEF 4 "register_operand"))]
+	  UNSPEC_BLENDV))]
+  "SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH
+  && ((rtx_equal_p (operands[1], operands[3])
+       && rtx_equal_p (operands[2], operands[4]))
+      || (rtx_equal_p (operands[1], operands[4])
+	  && rtx_equal_p (operands[2], operands[3])))
+  && ix86_pre_reload_split ()"
+  "#"
+  "&& 1"
+  [(const_int 0)]
+{
+  int u = (rtx_equal_p (operands[1], operands[3])
+	   && rtx_equal_p (operands[2], operands[4]))
+	   ? UNSPEC_IEEE_MAX : UNSPEC_IEEE_MIN;
+  emit_move_insn (operands[0],
+		  gen_rtx_UNSPEC (<MODE>mode,
+				  gen_rtvec (2, operands[2], operands[1]),
+				  u));
+  DONE;
+})
+
 ;; Make two stack loads independent:
 ;;   fld aa              fld aa
 ;;   fld %st(0)     ->   fld bb
diff --git a/gcc/testsuite/g++.target/i386/pr110170.C b/gcc/testsuite/g++.target/i386/pr110170.C
new file mode 100644
index 00000000000..1e9a781ca74
--- /dev/null
+++ b/gcc/testsuite/g++.target/i386/pr110170.C
@@ -0,0 +1,78 @@ 
+/* { dg-do run } */
+/* { dg-options " -O2 -march=x86-64 -mfpmath=sse -std=gnu++20" } */
+#include <math.h>
+
+void
+__attribute__((noinline))
+__cond_swap(double* __x, double* __y) {
+  bool __r = (*__x < *__y);
+  auto __tmp = __r ? *__x : *__y;
+  *__y = __r ? *__y : *__x;
+  *__x = __tmp;
+}
+
+auto test1() {
+    double nan = -0.0;
+    double x = 0.0;
+    __cond_swap(&nan, &x);
+    return x == -0.0 && nan == 0.0;
+}
+
+auto test1r() {
+    double nan = NAN;
+    double x = 1.0;
+    __cond_swap(&x, &nan);
+    return isnan(x) && signbit(x) == 0 && nan == 1.0;
+}
+
+auto test2() {
+    double nan = NAN;
+    double x = -1.0;
+    __cond_swap(&nan, &x);
+    return isnan(x) && signbit(x) == 0 && nan == -1.0;
+}
+
+auto test2r() {
+    double nan = NAN;
+    double x = -1.0;
+    __cond_swap(&x, &nan);
+    return isnan(x) && signbit(x) == 0 && nan == -1.0;
+}
+
+auto test3() {
+    double nan = -NAN;
+    double x = 1.0;
+    __cond_swap(&nan, &x);
+    return isnan(x) && signbit(x) == 1 && nan == 1.0;
+}
+
+auto test3r() {
+    double nan = -NAN;
+    double x = 1.0;
+    __cond_swap(&x, &nan);
+    return isnan(x) && signbit(x) == 1 && nan == 1.0;
+}
+
+auto test4() {
+    double nan = -NAN;
+    double x = -1.0;
+    __cond_swap(&nan, &x);
+    return isnan(x) && signbit(x) == 1 && nan == -1.0;
+}
+
+auto test4r() {
+    double nan = -NAN;
+    double x = -1.0;
+    __cond_swap(&x, &nan);
+    return isnan(x) && signbit(x) == 1 && nan == -1.0;
+}
+
+
+int main() {
+    if (
+        !test1() || !test1r()
+        || !test2() || !test2r()
+        || !test3() || !test4r()
+        || !test4() || !test4r()
+    ) __builtin_abort();
+}
diff --git a/gcc/testsuite/gcc.target/i386/pr110170.c b/gcc/testsuite/gcc.target/i386/pr110170.c
new file mode 100644
index 00000000000..0f98545cce3
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr110170.c
@@ -0,0 +1,18 @@ 
+/* { dg-do compile { target { ! ia32 } } } */
+/* { dg-options " -O2 -march=x86-64-v2 -mfpmath=sse" } */
+/* { dg-final { scan-assembler-times {(?n)mins[sd]} 2 } } */
+/* { dg-final { scan-assembler-times {(?n)maxs[sd]} 2 } } */
+
+void __cond_swap_df(double* __x, double* __y) {
+  _Bool __r = (*__x < *__y);
+  double __tmp = __r ? *__x : *__y;
+  *__y = __r ? *__y : *__x;
+  *__x = __tmp;
+}
+
+void __cond_swap_sf(float* __x, float* __y) {
+  _Bool __r = (*__x < *__y);
+  float __tmp = __r ? *__x : *__y;
+  *__y = __r ? *__y : *__x;
+  *__x = __tmp;
+}