diff mbox series

[V2,x86] Add pre_reload splitter to detect fp min/max pattern.

Message ID 20230707052914.3386877-1-hongtao.liu@intel.com
State New
Headers show
Series [V2,x86] Add pre_reload splitter to detect fp min/max pattern. | expand

Commit Message

Liu, Hongtao July 7, 2023, 5:29 a.m. UTC
> Please split the above pattern into two, one emitting UNSPEC_IEEE_MAX
> and the other emitting UNSPEC_IEEE_MIN.
Splitted.

> The test involves blendv instruction, which is SSE4.1, so it is
> pointless to test it without -msse4.1. Please add -msse4.1 instead of
> -march=x86_64 and use sse4_runtime target selector, as is the case
> with gcc.target/i386/pr90358.c.
Changed.

> Please also use -msse4.1 instead of -march here. With -mfpmath=sse,
> the test is valid also for 32bit targets, you should use -msseregparm
> additional options for ia32 (please see gcc.target/i386/pr43546.c
> testcase) in the same way as -mregparm to pass SSE arguments in
> registers.
32-bit target still failed to do condition elimination for DFmode due to
below code in rtx_cost

  /* A size N times larger than UNITS_PER_WORD likely needs N times as
     many insns, taking N times as long.  */
  factor = mode_size > UNITS_PER_WORD ? mode_size / UNITS_PER_WORD : 1;

It looks like a separate issue for DFmode operation under 32-bit target.

I've enable 32-bit for the testcase, but only scan for minss/maxss
currently.

Here's updated patch.
Bootstrapped and regtested on x86_64-pc-linux-gnu{-m32,}.
Ok for trunk?

We have ix86_expand_sse_fp_minmax to detect min/max sematics, but
it requires rtx_equal_p for cmp_op0/cmp_op1 and if_true/if_false, for
the testcase in the PR, there's an extra move from cmp_op0 to if_true,
and it failed ix86_expand_sse_fp_minmax.

This patch adds pre_reload splitter to detect the min/max pattern.

Operands order in MINSS matters for signed zero and NANs, since the
instruction always returns second operand when any operand is NAN or
both operands are zero.

gcc/ChangeLog:

	PR target/110170
	* config/i386/i386.md (*ieee_max<mode>3_1): New pre_reload
	splitter to detect fp max pattern.
	(*ieee_min<mode>3_1): Ditto, but for fp min pattern.

gcc/testsuite/ChangeLog:

	* g++.target/i386/pr110170.C: New test.
	* gcc.target/i386/pr110170.c: New test.
---
 gcc/config/i386/i386.md                  | 43 +++++++++++++
 gcc/testsuite/g++.target/i386/pr110170.C | 78 ++++++++++++++++++++++++
 gcc/testsuite/gcc.target/i386/pr110170.c | 21 +++++++
 3 files changed, 142 insertions(+)
 create mode 100644 gcc/testsuite/g++.target/i386/pr110170.C
 create mode 100644 gcc/testsuite/gcc.target/i386/pr110170.c

Comments

Uros Bizjak July 7, 2023, 6:02 a.m. UTC | #1
On Fri, Jul 7, 2023 at 7:31 AM liuhongt <hongtao.liu@intel.com> wrote:
>
> > Please split the above pattern into two, one emitting UNSPEC_IEEE_MAX
> > and the other emitting UNSPEC_IEEE_MIN.
> Splitted.
>
> > The test involves blendv instruction, which is SSE4.1, so it is
> > pointless to test it without -msse4.1. Please add -msse4.1 instead of
> > -march=x86_64 and use sse4_runtime target selector, as is the case
> > with gcc.target/i386/pr90358.c.
> Changed.
>
> > Please also use -msse4.1 instead of -march here. With -mfpmath=sse,
> > the test is valid also for 32bit targets, you should use -msseregparm
> > additional options for ia32 (please see gcc.target/i386/pr43546.c
> > testcase) in the same way as -mregparm to pass SSE arguments in
> > registers.
> 32-bit target still failed to do condition elimination for DFmode due to
> below code in rtx_cost
>
>   /* A size N times larger than UNITS_PER_WORD likely needs N times as
>      many insns, taking N times as long.  */
>   factor = mode_size > UNITS_PER_WORD ? mode_size / UNITS_PER_WORD : 1;
>
> It looks like a separate issue for DFmode operation under 32-bit target.
>
> I've enable 32-bit for the testcase, but only scan for minss/maxss
> currently.
>
> Here's updated patch.
> Bootstrapped and regtested on x86_64-pc-linux-gnu{-m32,}.
> Ok for trunk?
>
> We have ix86_expand_sse_fp_minmax to detect min/max sematics, but
> it requires rtx_equal_p for cmp_op0/cmp_op1 and if_true/if_false, for
> the testcase in the PR, there's an extra move from cmp_op0 to if_true,
> and it failed ix86_expand_sse_fp_minmax.
>
> This patch adds pre_reload splitter to detect the min/max pattern.
>
> Operands order in MINSS matters for signed zero and NANs, since the
> instruction always returns second operand when any operand is NAN or
> both operands are zero.
>
> gcc/ChangeLog:
>
>         PR target/110170
>         * config/i386/i386.md (*ieee_max<mode>3_1): New pre_reload
>         splitter to detect fp max pattern.
>         (*ieee_min<mode>3_1): Ditto, but for fp min pattern.
>
> gcc/testsuite/ChangeLog:
>
>         * g++.target/i386/pr110170.C: New test.
>         * gcc.target/i386/pr110170.c: New test.

OK with a testcase fix below.

Uros.

> ---
>  gcc/config/i386/i386.md                  | 43 +++++++++++++
>  gcc/testsuite/g++.target/i386/pr110170.C | 78 ++++++++++++++++++++++++
>  gcc/testsuite/gcc.target/i386/pr110170.c | 21 +++++++
>  3 files changed, 142 insertions(+)
>  create mode 100644 gcc/testsuite/g++.target/i386/pr110170.C
>  create mode 100644 gcc/testsuite/gcc.target/i386/pr110170.c
>
> diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md
> index a82cc353cfd..6f415f899ae 100644
> --- a/gcc/config/i386/i386.md
> +++ b/gcc/config/i386/i386.md
> @@ -23163,6 +23163,49 @@ (define_insn "*ieee_s<ieee_maxmin><mode>3"
>     (set_attr "type" "sseadd")
>     (set_attr "mode" "<MODE>")])
>
> +;; Operands order in min/max instruction matters for signed zero and NANs.
> +(define_insn_and_split "*ieee_max<mode>3_1"
> +  [(set (match_operand:MODEF 0 "register_operand")
> +       (unspec:MODEF
> +         [(match_operand:MODEF 1 "register_operand")
> +          (match_operand:MODEF 2 "register_operand")
> +          (lt:MODEF
> +            (match_operand:MODEF 3 "register_operand")
> +            (match_operand:MODEF 4 "register_operand"))]
> +         UNSPEC_BLENDV))]
> +  "SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH
> +  && (rtx_equal_p (operands[1], operands[3])
> +      && rtx_equal_p (operands[2], operands[4]))
> +  && ix86_pre_reload_split ()"
> +  "#"
> +  "&& 1"
> +  [(set (match_dup 0)
> +       (unspec:MODEF
> +         [(match_dup 2)
> +          (match_dup 1)]
> +        UNSPEC_IEEE_MAX))])
> +
> +(define_insn_and_split "*ieee_min<mode>3_1"
> +  [(set (match_operand:MODEF 0 "register_operand")
> +       (unspec:MODEF
> +         [(match_operand:MODEF 1 "register_operand")
> +          (match_operand:MODEF 2 "register_operand")
> +          (lt:MODEF
> +            (match_operand:MODEF 3 "register_operand")
> +            (match_operand:MODEF 4 "register_operand"))]
> +         UNSPEC_BLENDV))]
> +  "SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH
> +  && (rtx_equal_p (operands[1], operands[4])
> +      && rtx_equal_p (operands[2], operands[3]))
> +  && ix86_pre_reload_split ()"
> +  "#"
> +  "&& 1"
> +  [(set (match_dup 0)
> +       (unspec:MODEF
> +         [(match_dup 2)
> +          (match_dup 1)]
> +        UNSPEC_IEEE_MIN))])
> +
>  ;; Make two stack loads independent:
>  ;;   fld aa              fld aa
>  ;;   fld %st(0)     ->   fld bb
> diff --git a/gcc/testsuite/g++.target/i386/pr110170.C b/gcc/testsuite/g++.target/i386/pr110170.C
> new file mode 100644
> index 00000000000..5d6842270d0
> --- /dev/null
> +++ b/gcc/testsuite/g++.target/i386/pr110170.C
> @@ -0,0 +1,78 @@
> +/* { dg-do run } */
> +/* { dg-options " -O2 -msse4.1 -mfpmath=sse -std=gnu++20" } */

Please either change the first line to:

{ dg-do run { target sse4_runtime } }

or add

{ dg-require-effective-target sse4_runtime }

to the runtime test.

> +#include <math.h>
> +
> +void
> +__attribute__((noinline))
> +__cond_swap(double* __x, double* __y) {
> +  bool __r = (*__x < *__y);
> +  auto __tmp = __r ? *__x : *__y;
> +  *__y = __r ? *__y : *__x;
> +  *__x = __tmp;
> +}
> +
> +auto test1() {
> +    double nan = -0.0;
> +    double x = 0.0;
> +    __cond_swap(&nan, &x);
> +    return x == -0.0 && nan == 0.0;
> +}
> +
> +auto test1r() {
> +    double nan = NAN;
> +    double x = 1.0;
> +    __cond_swap(&x, &nan);
> +    return isnan(x) && signbit(x) == 0 && nan == 1.0;
> +}
> +
> +auto test2() {
> +    double nan = NAN;
> +    double x = -1.0;
> +    __cond_swap(&nan, &x);
> +    return isnan(x) && signbit(x) == 0 && nan == -1.0;
> +}
> +
> +auto test2r() {
> +    double nan = NAN;
> +    double x = -1.0;
> +    __cond_swap(&x, &nan);
> +    return isnan(x) && signbit(x) == 0 && nan == -1.0;
> +}
> +
> +auto test3() {
> +    double nan = -NAN;
> +    double x = 1.0;
> +    __cond_swap(&nan, &x);
> +    return isnan(x) && signbit(x) == 1 && nan == 1.0;
> +}
> +
> +auto test3r() {
> +    double nan = -NAN;
> +    double x = 1.0;
> +    __cond_swap(&x, &nan);
> +    return isnan(x) && signbit(x) == 1 && nan == 1.0;
> +}
> +
> +auto test4() {
> +    double nan = -NAN;
> +    double x = -1.0;
> +    __cond_swap(&nan, &x);
> +    return isnan(x) && signbit(x) == 1 && nan == -1.0;
> +}
> +
> +auto test4r() {
> +    double nan = -NAN;
> +    double x = -1.0;
> +    __cond_swap(&x, &nan);
> +    return isnan(x) && signbit(x) == 1 && nan == -1.0;
> +}
> +
> +
> +int main() {
> +    if (
> +        !test1() || !test1r()
> +        || !test2() || !test2r()
> +        || !test3() || !test4r()
> +        || !test4() || !test4r()
> +    ) __builtin_abort();
> +}
> diff --git a/gcc/testsuite/gcc.target/i386/pr110170.c b/gcc/testsuite/gcc.target/i386/pr110170.c
> new file mode 100644
> index 00000000000..c72f73398a1
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/pr110170.c
> @@ -0,0 +1,21 @@
> +/* { dg-do compile } */
> +/* { dg-options " -O2 -msse4.1 -mfpmath=sse" } */
> +/* { dg-final { scan-assembler-times {(?n)mins[sd]} 2 { target { ! ia32 } } } } */
> +/* { dg-final { scan-assembler-times {(?n)maxs[sd]} 2 { target { ! ia32 } } } } */
> +/* Ideally cond_swap_df is also optimized to minsd/maxsd.  */
> +/* { dg-final { scan-assembler-times {(?n)mins[sd]} 1 { target ia32 } } } */
> +/* { dg-final { scan-assembler-times {(?n)maxs[sd]} 1 { target ia32 } } } */
> +
> +void __cond_swap_df(double* __x, double* __y) {
> +  _Bool __r = (*__x < *__y);
> +  double __tmp = __r ? *__x : *__y;
> +  *__y = __r ? *__y : *__x;
> +  *__x = __tmp;
> +}
> +
> +void __cond_swap_sf(float* __x, float* __y) {
> +  _Bool __r = (*__x < *__y);
> +  float __tmp = __r ? *__x : *__y;
> +  *__y = __r ? *__y : *__x;
> +  *__x = __tmp;
> +}
> --
> 2.39.1.388.g2fc9e9ca3c
>
Hongtao Liu July 7, 2023, 6:41 a.m. UTC | #2
On Fri, Jul 7, 2023 at 2:02 PM Uros Bizjak via Gcc-patches
<gcc-patches@gcc.gnu.org> wrote:
>
> On Fri, Jul 7, 2023 at 7:31 AM liuhongt <hongtao.liu@intel.com> wrote:
> >
> > > Please split the above pattern into two, one emitting UNSPEC_IEEE_MAX
> > > and the other emitting UNSPEC_IEEE_MIN.
> > Splitted.
> >
> > > The test involves blendv instruction, which is SSE4.1, so it is
> > > pointless to test it without -msse4.1. Please add -msse4.1 instead of
> > > -march=x86_64 and use sse4_runtime target selector, as is the case
> > > with gcc.target/i386/pr90358.c.
> > Changed.
> >
> > > Please also use -msse4.1 instead of -march here. With -mfpmath=sse,
> > > the test is valid also for 32bit targets, you should use -msseregparm
> > > additional options for ia32 (please see gcc.target/i386/pr43546.c
> > > testcase) in the same way as -mregparm to pass SSE arguments in
> > > registers.
> > 32-bit target still failed to do condition elimination for DFmode due to
> > below code in rtx_cost
> >
> >   /* A size N times larger than UNITS_PER_WORD likely needs N times as
> >      many insns, taking N times as long.  */
> >   factor = mode_size > UNITS_PER_WORD ? mode_size / UNITS_PER_WORD : 1;
> >
> > It looks like a separate issue for DFmode operation under 32-bit target.
> >
> > I've enable 32-bit for the testcase, but only scan for minss/maxss
> > currently.
> >
> > Here's updated patch.
> > Bootstrapped and regtested on x86_64-pc-linux-gnu{-m32,}.
> > Ok for trunk?
> >
> > We have ix86_expand_sse_fp_minmax to detect min/max sematics, but
> > it requires rtx_equal_p for cmp_op0/cmp_op1 and if_true/if_false, for
> > the testcase in the PR, there's an extra move from cmp_op0 to if_true,
> > and it failed ix86_expand_sse_fp_minmax.
> >
> > This patch adds pre_reload splitter to detect the min/max pattern.
> >
> > Operands order in MINSS matters for signed zero and NANs, since the
> > instruction always returns second operand when any operand is NAN or
> > both operands are zero.
> >
> > gcc/ChangeLog:
> >
> >         PR target/110170
> >         * config/i386/i386.md (*ieee_max<mode>3_1): New pre_reload
> >         splitter to detect fp max pattern.
> >         (*ieee_min<mode>3_1): Ditto, but for fp min pattern.
> >
> > gcc/testsuite/ChangeLog:
> >
> >         * g++.target/i386/pr110170.C: New test.
> >         * gcc.target/i386/pr110170.c: New test.
>
> OK with a testcase fix below.
>
> Uros.
>
> > ---
> >  gcc/config/i386/i386.md                  | 43 +++++++++++++
> >  gcc/testsuite/g++.target/i386/pr110170.C | 78 ++++++++++++++++++++++++
> >  gcc/testsuite/gcc.target/i386/pr110170.c | 21 +++++++
> >  3 files changed, 142 insertions(+)
> >  create mode 100644 gcc/testsuite/g++.target/i386/pr110170.C
> >  create mode 100644 gcc/testsuite/gcc.target/i386/pr110170.c
> >
> > diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md
> > index a82cc353cfd..6f415f899ae 100644
> > --- a/gcc/config/i386/i386.md
> > +++ b/gcc/config/i386/i386.md
> > @@ -23163,6 +23163,49 @@ (define_insn "*ieee_s<ieee_maxmin><mode>3"
> >     (set_attr "type" "sseadd")
> >     (set_attr "mode" "<MODE>")])
> >
> > +;; Operands order in min/max instruction matters for signed zero and NANs.
> > +(define_insn_and_split "*ieee_max<mode>3_1"
> > +  [(set (match_operand:MODEF 0 "register_operand")
> > +       (unspec:MODEF
> > +         [(match_operand:MODEF 1 "register_operand")
> > +          (match_operand:MODEF 2 "register_operand")
> > +          (lt:MODEF
> > +            (match_operand:MODEF 3 "register_operand")
> > +            (match_operand:MODEF 4 "register_operand"))]
> > +         UNSPEC_BLENDV))]
> > +  "SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH
> > +  && (rtx_equal_p (operands[1], operands[3])
> > +      && rtx_equal_p (operands[2], operands[4]))
> > +  && ix86_pre_reload_split ()"
> > +  "#"
> > +  "&& 1"
> > +  [(set (match_dup 0)
> > +       (unspec:MODEF
> > +         [(match_dup 2)
> > +          (match_dup 1)]
> > +        UNSPEC_IEEE_MAX))])
> > +
> > +(define_insn_and_split "*ieee_min<mode>3_1"
> > +  [(set (match_operand:MODEF 0 "register_operand")
> > +       (unspec:MODEF
> > +         [(match_operand:MODEF 1 "register_operand")
> > +          (match_operand:MODEF 2 "register_operand")
> > +          (lt:MODEF
> > +            (match_operand:MODEF 3 "register_operand")
> > +            (match_operand:MODEF 4 "register_operand"))]
> > +         UNSPEC_BLENDV))]
> > +  "SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH
> > +  && (rtx_equal_p (operands[1], operands[4])
> > +      && rtx_equal_p (operands[2], operands[3]))
> > +  && ix86_pre_reload_split ()"
> > +  "#"
> > +  "&& 1"
> > +  [(set (match_dup 0)
> > +       (unspec:MODEF
> > +         [(match_dup 2)
> > +          (match_dup 1)]
> > +        UNSPEC_IEEE_MIN))])
> > +
> >  ;; Make two stack loads independent:
> >  ;;   fld aa              fld aa
> >  ;;   fld %st(0)     ->   fld bb
> > diff --git a/gcc/testsuite/g++.target/i386/pr110170.C b/gcc/testsuite/g++.target/i386/pr110170.C
> > new file mode 100644
> > index 00000000000..5d6842270d0
> > --- /dev/null
> > +++ b/gcc/testsuite/g++.target/i386/pr110170.C
> > @@ -0,0 +1,78 @@
> > +/* { dg-do run } */
> > +/* { dg-options " -O2 -msse4.1 -mfpmath=sse -std=gnu++20" } */
>
> Please either change the first line to:
>
> { dg-do run { target sse4_runtime } }
>
> or add
>
> { dg-require-effective-target sse4_runtime }
>
> to the runtime test.
Assume it's  { dg-do run { target sse4 } } + runtime check for
processor support of sse4.
I've included "sse4_1-check.h" in the testcase and renamed the main to
sse4_1_test to integrate the test into the existing infrastructure.

>
> > +#include <math.h>
> > +
> > +void
> > +__attribute__((noinline))
> > +__cond_swap(double* __x, double* __y) {
> > +  bool __r = (*__x < *__y);
> > +  auto __tmp = __r ? *__x : *__y;
> > +  *__y = __r ? *__y : *__x;
> > +  *__x = __tmp;
> > +}
> > +
> > +auto test1() {
> > +    double nan = -0.0;
> > +    double x = 0.0;
> > +    __cond_swap(&nan, &x);
> > +    return x == -0.0 && nan == 0.0;
> > +}
> > +
> > +auto test1r() {
> > +    double nan = NAN;
> > +    double x = 1.0;
> > +    __cond_swap(&x, &nan);
> > +    return isnan(x) && signbit(x) == 0 && nan == 1.0;
> > +}
> > +
> > +auto test2() {
> > +    double nan = NAN;
> > +    double x = -1.0;
> > +    __cond_swap(&nan, &x);
> > +    return isnan(x) && signbit(x) == 0 && nan == -1.0;
> > +}
> > +
> > +auto test2r() {
> > +    double nan = NAN;
> > +    double x = -1.0;
> > +    __cond_swap(&x, &nan);
> > +    return isnan(x) && signbit(x) == 0 && nan == -1.0;
> > +}
> > +
> > +auto test3() {
> > +    double nan = -NAN;
> > +    double x = 1.0;
> > +    __cond_swap(&nan, &x);
> > +    return isnan(x) && signbit(x) == 1 && nan == 1.0;
> > +}
> > +
> > +auto test3r() {
> > +    double nan = -NAN;
> > +    double x = 1.0;
> > +    __cond_swap(&x, &nan);
> > +    return isnan(x) && signbit(x) == 1 && nan == 1.0;
> > +}
> > +
> > +auto test4() {
> > +    double nan = -NAN;
> > +    double x = -1.0;
> > +    __cond_swap(&nan, &x);
> > +    return isnan(x) && signbit(x) == 1 && nan == -1.0;
> > +}
> > +
> > +auto test4r() {
> > +    double nan = -NAN;
> > +    double x = -1.0;
> > +    __cond_swap(&x, &nan);
> > +    return isnan(x) && signbit(x) == 1 && nan == -1.0;
> > +}
> > +
> > +
> > +int main() {
> > +    if (
> > +        !test1() || !test1r()
> > +        || !test2() || !test2r()
> > +        || !test3() || !test4r()
> > +        || !test4() || !test4r()
> > +    ) __builtin_abort();
> > +}
> > diff --git a/gcc/testsuite/gcc.target/i386/pr110170.c b/gcc/testsuite/gcc.target/i386/pr110170.c
> > new file mode 100644
> > index 00000000000..c72f73398a1
> > --- /dev/null
> > +++ b/gcc/testsuite/gcc.target/i386/pr110170.c
> > @@ -0,0 +1,21 @@
> > +/* { dg-do compile } */
> > +/* { dg-options " -O2 -msse4.1 -mfpmath=sse" } */
> > +/* { dg-final { scan-assembler-times {(?n)mins[sd]} 2 { target { ! ia32 } } } } */
> > +/* { dg-final { scan-assembler-times {(?n)maxs[sd]} 2 { target { ! ia32 } } } } */
> > +/* Ideally cond_swap_df is also optimized to minsd/maxsd.  */
> > +/* { dg-final { scan-assembler-times {(?n)mins[sd]} 1 { target ia32 } } } */
> > +/* { dg-final { scan-assembler-times {(?n)maxs[sd]} 1 { target ia32 } } } */
> > +
> > +void __cond_swap_df(double* __x, double* __y) {
> > +  _Bool __r = (*__x < *__y);
> > +  double __tmp = __r ? *__x : *__y;
> > +  *__y = __r ? *__y : *__x;
> > +  *__x = __tmp;
> > +}
> > +
> > +void __cond_swap_sf(float* __x, float* __y) {
> > +  _Bool __r = (*__x < *__y);
> > +  float __tmp = __r ? *__x : *__y;
> > +  *__y = __r ? *__y : *__x;
> > +  *__x = __tmp;
> > +}
> > --
> > 2.39.1.388.g2fc9e9ca3c
> >
diff mbox series

Patch

diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md
index a82cc353cfd..6f415f899ae 100644
--- a/gcc/config/i386/i386.md
+++ b/gcc/config/i386/i386.md
@@ -23163,6 +23163,49 @@  (define_insn "*ieee_s<ieee_maxmin><mode>3"
    (set_attr "type" "sseadd")
    (set_attr "mode" "<MODE>")])
 
+;; Operands order in min/max instruction matters for signed zero and NANs.
+(define_insn_and_split "*ieee_max<mode>3_1"
+  [(set (match_operand:MODEF 0 "register_operand")
+	(unspec:MODEF
+	  [(match_operand:MODEF 1 "register_operand")
+	   (match_operand:MODEF 2 "register_operand")
+	   (lt:MODEF
+	     (match_operand:MODEF 3 "register_operand")
+	     (match_operand:MODEF 4 "register_operand"))]
+	  UNSPEC_BLENDV))]
+  "SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH
+  && (rtx_equal_p (operands[1], operands[3])
+      && rtx_equal_p (operands[2], operands[4]))
+  && ix86_pre_reload_split ()"
+  "#"
+  "&& 1"
+  [(set (match_dup 0)
+	(unspec:MODEF
+	  [(match_dup 2)
+	   (match_dup 1)]
+	 UNSPEC_IEEE_MAX))])
+
+(define_insn_and_split "*ieee_min<mode>3_1"
+  [(set (match_operand:MODEF 0 "register_operand")
+	(unspec:MODEF
+	  [(match_operand:MODEF 1 "register_operand")
+	   (match_operand:MODEF 2 "register_operand")
+	   (lt:MODEF
+	     (match_operand:MODEF 3 "register_operand")
+	     (match_operand:MODEF 4 "register_operand"))]
+	  UNSPEC_BLENDV))]
+  "SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH
+  && (rtx_equal_p (operands[1], operands[4])
+      && rtx_equal_p (operands[2], operands[3]))
+  && ix86_pre_reload_split ()"
+  "#"
+  "&& 1"
+  [(set (match_dup 0)
+	(unspec:MODEF
+	  [(match_dup 2)
+	   (match_dup 1)]
+	 UNSPEC_IEEE_MIN))])
+
 ;; Make two stack loads independent:
 ;;   fld aa              fld aa
 ;;   fld %st(0)     ->   fld bb
diff --git a/gcc/testsuite/g++.target/i386/pr110170.C b/gcc/testsuite/g++.target/i386/pr110170.C
new file mode 100644
index 00000000000..5d6842270d0
--- /dev/null
+++ b/gcc/testsuite/g++.target/i386/pr110170.C
@@ -0,0 +1,78 @@ 
+/* { dg-do run } */
+/* { dg-options " -O2 -msse4.1 -mfpmath=sse -std=gnu++20" } */
+#include <math.h>
+
+void
+__attribute__((noinline))
+__cond_swap(double* __x, double* __y) {
+  bool __r = (*__x < *__y);
+  auto __tmp = __r ? *__x : *__y;
+  *__y = __r ? *__y : *__x;
+  *__x = __tmp;
+}
+
+auto test1() {
+    double nan = -0.0;
+    double x = 0.0;
+    __cond_swap(&nan, &x);
+    return x == -0.0 && nan == 0.0;
+}
+
+auto test1r() {
+    double nan = NAN;
+    double x = 1.0;
+    __cond_swap(&x, &nan);
+    return isnan(x) && signbit(x) == 0 && nan == 1.0;
+}
+
+auto test2() {
+    double nan = NAN;
+    double x = -1.0;
+    __cond_swap(&nan, &x);
+    return isnan(x) && signbit(x) == 0 && nan == -1.0;
+}
+
+auto test2r() {
+    double nan = NAN;
+    double x = -1.0;
+    __cond_swap(&x, &nan);
+    return isnan(x) && signbit(x) == 0 && nan == -1.0;
+}
+
+auto test3() {
+    double nan = -NAN;
+    double x = 1.0;
+    __cond_swap(&nan, &x);
+    return isnan(x) && signbit(x) == 1 && nan == 1.0;
+}
+
+auto test3r() {
+    double nan = -NAN;
+    double x = 1.0;
+    __cond_swap(&x, &nan);
+    return isnan(x) && signbit(x) == 1 && nan == 1.0;
+}
+
+auto test4() {
+    double nan = -NAN;
+    double x = -1.0;
+    __cond_swap(&nan, &x);
+    return isnan(x) && signbit(x) == 1 && nan == -1.0;
+}
+
+auto test4r() {
+    double nan = -NAN;
+    double x = -1.0;
+    __cond_swap(&x, &nan);
+    return isnan(x) && signbit(x) == 1 && nan == -1.0;
+}
+
+
+int main() {
+    if (
+        !test1() || !test1r()
+        || !test2() || !test2r()
+        || !test3() || !test4r()
+        || !test4() || !test4r()
+    ) __builtin_abort();
+}
diff --git a/gcc/testsuite/gcc.target/i386/pr110170.c b/gcc/testsuite/gcc.target/i386/pr110170.c
new file mode 100644
index 00000000000..c72f73398a1
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr110170.c
@@ -0,0 +1,21 @@ 
+/* { dg-do compile } */
+/* { dg-options " -O2 -msse4.1 -mfpmath=sse" } */
+/* { dg-final { scan-assembler-times {(?n)mins[sd]} 2 { target { ! ia32 } } } } */
+/* { dg-final { scan-assembler-times {(?n)maxs[sd]} 2 { target { ! ia32 } } } } */
+/* Ideally cond_swap_df is also optimized to minsd/maxsd.  */
+/* { dg-final { scan-assembler-times {(?n)mins[sd]} 1 { target ia32 } } } */
+/* { dg-final { scan-assembler-times {(?n)maxs[sd]} 1 { target ia32 } } } */
+
+void __cond_swap_df(double* __x, double* __y) {
+  _Bool __r = (*__x < *__y);
+  double __tmp = __r ? *__x : *__y;
+  *__y = __r ? *__y : *__x;
+  *__x = __tmp;
+}
+
+void __cond_swap_sf(float* __x, float* __y) {
+  _Bool __r = (*__x < *__y);
+  float __tmp = __r ? *__x : *__y;
+  *__y = __r ? *__y : *__x;
+  *__x = __tmp;
+}