diff mbox series

Use {,v}blendvp{s,d} for [SD]Fmode sse_movcc (PR target/88189)

Message ID 20181128074335.GU12380@tucnak
State New
Headers show
Series Use {,v}blendvp{s,d} for [SD]Fmode sse_movcc (PR target/88189) | expand

Commit Message

Jakub Jelinek Nov. 28, 2018, 7:43 a.m. UTC
Hi!

This patch implments Marc's idea of using {,v}blenvp{s,d} for scalar
[SD]Fmode ix86_expand_sse_movcc for -msse4.1 and above.
Without this patch we emit sequences like
        andpd   %xmm2, %xmm0
        andnpd  %xmm1, %xmm2
        orpd    %xmm2, %xmm0
or
        andps   %xmm2, %xmm0
        andnps  %xmm1, %xmm2
        orps    %xmm2, %xmm0
and this replaces it with
        blendvpd        %xmm0, %xmm2, %xmm1
        movapd  %xmm1, %xmm0
or
        blendvps        %xmm0, %xmm2, %xmm1
        movaps  %xmm1, %xmm0
Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?

2018-11-28  Jakub Jelinek  <jakub@redhat.com>

	PR target/88189
	* config/i386/i386.c (ix86_expand_sse_movcc): Handle DFmode and
	SFmode using sse4_1_blendvs[sd] with TARGET_SSE4_1.  Formatting fixes.
	* config/i386/sse.md (sse4_1_blendv<ssemodesuffix>): New pattern.

	* gcc.target/i386/sse4_1-pr88189-1.c: New test.
	* gcc.target/i386/sse4_1-pr88189-2.c: New test.
	* gcc.target/i386/avx-pr88189-1.c: New test.
	* gcc.target/i386/avx-pr88189-2.c: New test.


	Jakub

Comments

Uros Bizjak Nov. 28, 2018, 8:01 a.m. UTC | #1
On Wed, Nov 28, 2018 at 8:43 AM Jakub Jelinek <jakub@redhat.com> wrote:
>
> Hi!
>
> This patch implments Marc's idea of using {,v}blenvp{s,d} for scalar
> [SD]Fmode ix86_expand_sse_movcc for -msse4.1 and above.
> Without this patch we emit sequences like
>         andpd   %xmm2, %xmm0
>         andnpd  %xmm1, %xmm2
>         orpd    %xmm2, %xmm0
> or
>         andps   %xmm2, %xmm0
>         andnps  %xmm1, %xmm2
>         orps    %xmm2, %xmm0
> and this replaces it with
>         blendvpd        %xmm0, %xmm2, %xmm1
>         movapd  %xmm1, %xmm0
> or
>         blendvps        %xmm0, %xmm2, %xmm1
>         movaps  %xmm1, %xmm0
> Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?
>
> 2018-11-28  Jakub Jelinek  <jakub@redhat.com>
>
>         PR target/88189
>         * config/i386/i386.c (ix86_expand_sse_movcc): Handle DFmode and
>         SFmode using sse4_1_blendvs[sd] with TARGET_SSE4_1.  Formatting fixes.
>         * config/i386/sse.md (sse4_1_blendv<ssemodesuffix>): New pattern.
>
>         * gcc.target/i386/sse4_1-pr88189-1.c: New test.
>         * gcc.target/i386/sse4_1-pr88189-2.c: New test.
>         * gcc.target/i386/avx-pr88189-1.c: New test.
>         * gcc.target/i386/avx-pr88189-2.c: New test.

OK.

Thanks,
Uros.

> --- gcc/config/i386/i386.c.jj   2018-11-26 22:25:50.716253308 +0100
> +++ gcc/config/i386/i386.c      2018-11-27 11:18:23.135715272 +0100
> @@ -23585,15 +23585,13 @@ ix86_expand_sse_movcc (rtx dest, rtx cmp
>      {
>        emit_insn (gen_rtx_SET (dest, cmp));
>      }
> -  else if (op_false == CONST0_RTX (mode)
> -      && !maskcmp)
> +  else if (op_false == CONST0_RTX (mode) && !maskcmp)
>      {
>        op_true = force_reg (mode, op_true);
>        x = gen_rtx_AND (mode, cmp, op_true);
>        emit_insn (gen_rtx_SET (dest, x));
>      }
> -  else if (op_true == CONST0_RTX (mode)
> -      && !maskcmp)
> +  else if (op_true == CONST0_RTX (mode) && !maskcmp)
>      {
>        op_false = force_reg (mode, op_false);
>        x = gen_rtx_NOT (mode, cmp);
> @@ -23601,14 +23599,13 @@ ix86_expand_sse_movcc (rtx dest, rtx cmp
>        emit_insn (gen_rtx_SET (dest, x));
>      }
>    else if (INTEGRAL_MODE_P (mode) && op_true == CONSTM1_RTX (mode)
> -      && !maskcmp)
> +          && !maskcmp)
>      {
>        op_false = force_reg (mode, op_false);
>        x = gen_rtx_IOR (mode, cmp, op_false);
>        emit_insn (gen_rtx_SET (dest, x));
>      }
> -  else if (TARGET_XOP
> -      && !maskcmp)
> +  else if (TARGET_XOP && !maskcmp)
>      {
>        op_true = force_reg (mode, op_true);
>
> @@ -23639,6 +23636,20 @@ ix86_expand_sse_movcc (rtx dest, rtx cmp
>           if (TARGET_SSE4_1)
>             gen = gen_sse4_1_blendvpd;
>           break;
> +       case E_SFmode:
> +         if (TARGET_SSE4_1)
> +           {
> +             gen = gen_sse4_1_blendvss;
> +             op_true = force_reg (mode, op_true);
> +           }
> +         break;
> +       case E_DFmode:
> +         if (TARGET_SSE4_1)
> +           {
> +             gen = gen_sse4_1_blendvsd;
> +             op_true = force_reg (mode, op_true);
> +           }
> +         break;
>         case E_V16QImode:
>         case E_V8HImode:
>         case E_V4SImode:
> --- gcc/config/i386/sse.md.jj   2018-11-21 17:39:51.000000000 +0100
> +++ gcc/config/i386/sse.md      2018-11-27 10:48:38.500120925 +0100
> @@ -15641,6 +15641,46 @@ (define_insn "<sse4_1>_blendv<ssemodesuf
>     (set_attr "btver2_decode" "vector,vector,vector")
>     (set_attr "mode" "<MODE>")])
>
> +;; Also define scalar versions.  These are used for conditional move.
> +;; Using subregs into vector modes causes register allocation lossage.
> +;; These patterns do not allow memory operands because the native
> +;; instructions read the full 128-bits.
> +
> +(define_insn "sse4_1_blendv<ssemodesuffix>"
> +  [(set (match_operand:MODEF 0 "register_operand" "=Yr,*x,x")
> +       (unspec:MODEF
> +         [(match_operand:MODEF 1 "register_operand" "0,0,x")
> +          (match_operand:MODEF 2 "register_operand" "Yr,*x,x")
> +          (match_operand:MODEF 3 "register_operand" "Yz,Yz,x")]
> +         UNSPEC_BLENDV))]
> +  "TARGET_SSE4_1"
> +{
> +  if (get_attr_mode (insn) == MODE_V4SF)
> +    return (which_alternative == 2
> +           ? "vblendvps\t{%3, %2, %1, %0|%0, %1, %2, %3}"
> +           : "blendvps\t{%3, %2, %0|%0, %2, %3}");
> +  else
> +    return (which_alternative == 2
> +           ? "vblendv<ssevecmodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
> +           : "blendv<ssevecmodesuffix>\t{%3, %2, %0|%0, %2, %3}");
> +}
> +  [(set_attr "isa" "noavx,noavx,avx")
> +   (set_attr "type" "ssemov")
> +   (set_attr "length_immediate" "1")
> +   (set_attr "prefix_data16" "1,1,*")
> +   (set_attr "prefix_extra" "1")
> +   (set_attr "prefix" "orig,orig,vex")
> +   (set_attr "btver2_decode" "vector,vector,vector")
> +   (set (attr "mode")
> +       (cond [(match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL")
> +                (const_string "V4SF")
> +              (match_test "TARGET_AVX")
> +                (const_string "<ssevecmode>")
> +              (match_test "optimize_function_for_size_p (cfun)")
> +                (const_string "V4SF")
> +              ]
> +              (const_string "<ssevecmode>")))])
> +
>  (define_insn "<sse4_1>_dp<ssemodesuffix><avxsizesuffix>"
>    [(set (match_operand:VF_128_256 0 "register_operand" "=Yr,*x,x")
>         (unspec:VF_128_256
> --- gcc/testsuite/gcc.target/i386/sse4_1-pr88189-1.c.jj 2018-11-27 11:00:34.746322991 +0100
> +++ gcc/testsuite/gcc.target/i386/sse4_1-pr88189-1.c    2018-11-27 11:11:36.116423601 +0100
> @@ -0,0 +1,35 @@
> +/* { dg-do run } */
> +/* { dg-require-effective-target sse4 } */
> +/* { dg-options "-O2 -msse4.1 -mfpmath=sse" } */
> +
> +#ifndef CHECK_H
> +#define CHECK_H "sse4_1-check.h"
> +#endif
> +
> +#ifndef TEST
> +#define TEST sse4_1_test
> +#endif
> +
> +#include CHECK_H
> +
> +__attribute__((noipa)) double
> +f1 (double a, double b)
> +{
> +  return a < 0 ? a : b;
> +}
> +
> +__attribute__((noipa)) float
> +f2 (float a, float b)
> +{
> +  return a < 0 ? a : b;
> +}
> +
> +static void
> +TEST (void)
> +{
> +  if (f1 (5.0, 7.0) != 7.0
> +      || f1 (-2.0, 7.0) != -2.0
> +      || f2 (1.0f, 2.0f) != 2.0f
> +      || f2 (-1.0f, -3.0f) != -1.0f)
> +    abort ();
> +}
> --- gcc/testsuite/gcc.target/i386/sse4_1-pr88189-2.c.jj 2018-11-27 11:06:39.842306204 +0100
> +++ gcc/testsuite/gcc.target/i386/sse4_1-pr88189-2.c    2018-11-27 11:11:26.536581478 +0100
> @@ -0,0 +1,16 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O2 -msse4.1 -mno-avx -mfpmath=sse" } */
> +
> +__attribute__((noipa)) double
> +f1 (double a, double b)
> +{
> +  return a < 0 ? a : b;
> +}
> +
> +__attribute__((noipa)) float
> +f2 (float a, float b)
> +{
> +  return a < 0 ? a : b;
> +}
> +
> +/* { dg-final { scan-assembler-times "blendvp\[sd]" 2 } } */
> --- gcc/testsuite/gcc.target/i386/avx-pr88189-1.c.jj    2018-11-27 11:01:28.998428914 +0100
> +++ gcc/testsuite/gcc.target/i386/avx-pr88189-1.c       2018-11-27 11:11:49.642200671 +0100
> @@ -0,0 +1,8 @@
> +/* { dg-do run } */
> +/* { dg-require-effective-target avx } */
> +/* { dg-options "-O2 -mavx -mfpmath=sse" } */
> +
> +#define CHECK_H "avx-check.h"
> +#define TEST avx_test
> +
> +#include "sse4_1-pr88189-1.c"
> --- gcc/testsuite/gcc.target/i386/avx-pr88189-2.c.jj    2018-11-27 11:08:12.677776273 +0100
> +++ gcc/testsuite/gcc.target/i386/avx-pr88189-2.c       2018-11-27 11:11:13.726792579 +0100
> @@ -0,0 +1,16 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O2 -mavx -mfpmath=sse" } */
> +
> +__attribute__((noipa)) double
> +f1 (double a, double b)
> +{
> +  return a < 0 ? a : b;
> +}
> +
> +__attribute__((noipa)) float
> +f2 (float a, float b)
> +{
> +  return a < 0 ? a : b;
> +}
> +
> +/* { dg-final { scan-assembler-times "vblendvp\[sd]" 2 } } */
>
>         Jakub
diff mbox series

Patch

--- gcc/config/i386/i386.c.jj	2018-11-26 22:25:50.716253308 +0100
+++ gcc/config/i386/i386.c	2018-11-27 11:18:23.135715272 +0100
@@ -23585,15 +23585,13 @@  ix86_expand_sse_movcc (rtx dest, rtx cmp
     {
       emit_insn (gen_rtx_SET (dest, cmp));
     }
-  else if (op_false == CONST0_RTX (mode)
-      && !maskcmp)
+  else if (op_false == CONST0_RTX (mode) && !maskcmp)
     {
       op_true = force_reg (mode, op_true);
       x = gen_rtx_AND (mode, cmp, op_true);
       emit_insn (gen_rtx_SET (dest, x));
     }
-  else if (op_true == CONST0_RTX (mode)
-      && !maskcmp)
+  else if (op_true == CONST0_RTX (mode) && !maskcmp)
     {
       op_false = force_reg (mode, op_false);
       x = gen_rtx_NOT (mode, cmp);
@@ -23601,14 +23599,13 @@  ix86_expand_sse_movcc (rtx dest, rtx cmp
       emit_insn (gen_rtx_SET (dest, x));
     }
   else if (INTEGRAL_MODE_P (mode) && op_true == CONSTM1_RTX (mode)
-      && !maskcmp)
+	   && !maskcmp)
     {
       op_false = force_reg (mode, op_false);
       x = gen_rtx_IOR (mode, cmp, op_false);
       emit_insn (gen_rtx_SET (dest, x));
     }
-  else if (TARGET_XOP
-      && !maskcmp)
+  else if (TARGET_XOP && !maskcmp)
     {
       op_true = force_reg (mode, op_true);
 
@@ -23639,6 +23636,20 @@  ix86_expand_sse_movcc (rtx dest, rtx cmp
 	  if (TARGET_SSE4_1)
 	    gen = gen_sse4_1_blendvpd;
 	  break;
+	case E_SFmode:
+	  if (TARGET_SSE4_1)
+	    {
+	      gen = gen_sse4_1_blendvss;
+	      op_true = force_reg (mode, op_true);
+	    }
+	  break;
+	case E_DFmode:
+	  if (TARGET_SSE4_1)
+	    {
+	      gen = gen_sse4_1_blendvsd;
+	      op_true = force_reg (mode, op_true);
+	    }
+	  break;
 	case E_V16QImode:
 	case E_V8HImode:
 	case E_V4SImode:
--- gcc/config/i386/sse.md.jj	2018-11-21 17:39:51.000000000 +0100
+++ gcc/config/i386/sse.md	2018-11-27 10:48:38.500120925 +0100
@@ -15641,6 +15641,46 @@  (define_insn "<sse4_1>_blendv<ssemodesuf
    (set_attr "btver2_decode" "vector,vector,vector") 
    (set_attr "mode" "<MODE>")])
 
+;; Also define scalar versions.  These are used for conditional move.
+;; Using subregs into vector modes causes register allocation lossage.
+;; These patterns do not allow memory operands because the native
+;; instructions read the full 128-bits.
+
+(define_insn "sse4_1_blendv<ssemodesuffix>"
+  [(set (match_operand:MODEF 0 "register_operand" "=Yr,*x,x")
+	(unspec:MODEF
+	  [(match_operand:MODEF 1 "register_operand" "0,0,x")
+	   (match_operand:MODEF 2 "register_operand" "Yr,*x,x")
+	   (match_operand:MODEF 3 "register_operand" "Yz,Yz,x")]
+	  UNSPEC_BLENDV))]
+  "TARGET_SSE4_1"
+{
+  if (get_attr_mode (insn) == MODE_V4SF)
+    return (which_alternative == 2
+	    ? "vblendvps\t{%3, %2, %1, %0|%0, %1, %2, %3}"
+	    : "blendvps\t{%3, %2, %0|%0, %2, %3}");
+  else
+    return (which_alternative == 2
+	    ? "vblendv<ssevecmodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
+	    : "blendv<ssevecmodesuffix>\t{%3, %2, %0|%0, %2, %3}");
+}
+  [(set_attr "isa" "noavx,noavx,avx")
+   (set_attr "type" "ssemov")
+   (set_attr "length_immediate" "1")
+   (set_attr "prefix_data16" "1,1,*")
+   (set_attr "prefix_extra" "1")
+   (set_attr "prefix" "orig,orig,vex")
+   (set_attr "btver2_decode" "vector,vector,vector") 
+   (set (attr "mode")
+	(cond [(match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL")
+		 (const_string "V4SF")
+	       (match_test "TARGET_AVX")
+		 (const_string "<ssevecmode>")
+	       (match_test "optimize_function_for_size_p (cfun)")
+		 (const_string "V4SF")
+	       ]
+	       (const_string "<ssevecmode>")))])
+
 (define_insn "<sse4_1>_dp<ssemodesuffix><avxsizesuffix>"
   [(set (match_operand:VF_128_256 0 "register_operand" "=Yr,*x,x")
 	(unspec:VF_128_256
--- gcc/testsuite/gcc.target/i386/sse4_1-pr88189-1.c.jj	2018-11-27 11:00:34.746322991 +0100
+++ gcc/testsuite/gcc.target/i386/sse4_1-pr88189-1.c	2018-11-27 11:11:36.116423601 +0100
@@ -0,0 +1,35 @@ 
+/* { dg-do run } */
+/* { dg-require-effective-target sse4 } */
+/* { dg-options "-O2 -msse4.1 -mfpmath=sse" } */
+
+#ifndef CHECK_H
+#define CHECK_H "sse4_1-check.h"
+#endif
+
+#ifndef TEST
+#define TEST sse4_1_test
+#endif
+
+#include CHECK_H
+
+__attribute__((noipa)) double
+f1 (double a, double b)
+{
+  return a < 0 ? a : b;
+}
+
+__attribute__((noipa)) float
+f2 (float a, float b)
+{
+  return a < 0 ? a : b;
+}
+
+static void
+TEST (void)
+{
+  if (f1 (5.0, 7.0) != 7.0
+      || f1 (-2.0, 7.0) != -2.0
+      || f2 (1.0f, 2.0f) != 2.0f
+      || f2 (-1.0f, -3.0f) != -1.0f)
+    abort ();
+}
--- gcc/testsuite/gcc.target/i386/sse4_1-pr88189-2.c.jj	2018-11-27 11:06:39.842306204 +0100
+++ gcc/testsuite/gcc.target/i386/sse4_1-pr88189-2.c	2018-11-27 11:11:26.536581478 +0100
@@ -0,0 +1,16 @@ 
+/* { dg-do compile } */
+/* { dg-options "-O2 -msse4.1 -mno-avx -mfpmath=sse" } */
+
+__attribute__((noipa)) double
+f1 (double a, double b)
+{
+  return a < 0 ? a : b;
+}
+
+__attribute__((noipa)) float
+f2 (float a, float b)
+{
+  return a < 0 ? a : b;
+}
+
+/* { dg-final { scan-assembler-times "blendvp\[sd]" 2 } } */
--- gcc/testsuite/gcc.target/i386/avx-pr88189-1.c.jj	2018-11-27 11:01:28.998428914 +0100
+++ gcc/testsuite/gcc.target/i386/avx-pr88189-1.c	2018-11-27 11:11:49.642200671 +0100
@@ -0,0 +1,8 @@ 
+/* { dg-do run } */
+/* { dg-require-effective-target avx } */
+/* { dg-options "-O2 -mavx -mfpmath=sse" } */
+
+#define CHECK_H "avx-check.h"
+#define TEST avx_test
+
+#include "sse4_1-pr88189-1.c"
--- gcc/testsuite/gcc.target/i386/avx-pr88189-2.c.jj	2018-11-27 11:08:12.677776273 +0100
+++ gcc/testsuite/gcc.target/i386/avx-pr88189-2.c	2018-11-27 11:11:13.726792579 +0100
@@ -0,0 +1,16 @@ 
+/* { dg-do compile } */
+/* { dg-options "-O2 -mavx -mfpmath=sse" } */
+
+__attribute__((noipa)) double
+f1 (double a, double b)
+{
+  return a < 0 ? a : b;
+}
+
+__attribute__((noipa)) float
+f2 (float a, float b)
+{
+  return a < 0 ? a : b;
+}
+
+/* { dg-final { scan-assembler-times "vblendvp\[sd]" 2 } } */