diff mbox series

[2/5] x86: use VPTERNLOG also for certain andnot forms

Message ID 3cf55c98-d18a-d1ad-2fc2-015c63e217ca@suse.com
State New
Headers show
Series x86: make better use of VPTERNLOG{D,Q} | expand

Commit Message

Jan Beulich June 21, 2023, 6:27 a.m. UTC
When it's the memory operand which is to be inverted, using VPANDN*
requires a further load instruction. The same can be achieved by a
single VPTERNLOG*. Add two new alternatives (for plain memory and
embedded broadcast), adjusting the predicate for the first operand
accordingly.

Two pre-existing testcases actually end up being affected (improved) by
the change, which is reflected in updated expectations there.

gcc/

	PR target/93768
	* config/i386/sse.md (*andnot<mode>3): Add new alternatives
	for memory form operand 1.

gcc/testsuite/

	PR target/93768
	* gcc.target/i386/avx512f-andn-di-zmm-2.c: New test.
	* gcc.target/i386/avx512f-andn-si-zmm-2.c: Adjust expecations
	towards generated code.
	* gcc.target/i386/pr100711-3.c: Adjust expectations for 32-bit
	code.

Comments

Hongtao Liu June 25, 2023, 4:58 a.m. UTC | #1
On Wed, Jun 21, 2023 at 2:27 PM Jan Beulich via Gcc-patches
<gcc-patches@gcc.gnu.org> wrote:
>
> When it's the memory operand which is to be inverted, using VPANDN*
> requires a further load instruction. The same can be achieved by a
> single VPTERNLOG*. Add two new alternatives (for plain memory and
> embedded broadcast), adjusting the predicate for the first operand
> accordingly.
>
> Two pre-existing testcases actually end up being affected (improved) by
> the change, which is reflected in updated expectations there.
LGTM.
>
> gcc/
>
>         PR target/93768
>         * config/i386/sse.md (*andnot<mode>3): Add new alternatives
>         for memory form operand 1.
>
> gcc/testsuite/
>
>         PR target/93768
>         * gcc.target/i386/avx512f-andn-di-zmm-2.c: New test.
>         * gcc.target/i386/avx512f-andn-si-zmm-2.c: Adjust expecations
>         towards generated code.
>         * gcc.target/i386/pr100711-3.c: Adjust expectations for 32-bit
>         code.
>
> --- a/gcc/config/i386/sse.md
> +++ b/gcc/config/i386/sse.md
> @@ -17210,11 +17210,13 @@
>    "TARGET_AVX512F")
>
>  (define_insn "*andnot<mode>3"
> -  [(set (match_operand:VI 0 "register_operand" "=x,x,v")
> +  [(set (match_operand:VI 0 "register_operand" "=x,x,v,v,v")
>         (and:VI
> -         (not:VI (match_operand:VI 1 "vector_operand" "0,x,v"))
> -         (match_operand:VI 2 "bcst_vector_operand" "xBm,xm,vmBr")))]
> -  "TARGET_SSE"
> +         (not:VI (match_operand:VI 1 "bcst_vector_operand" "0,x,v,m,Br"))
> +         (match_operand:VI 2 "bcst_vector_operand" "xBm,xm,vmBr,v,v")))]
> +  "TARGET_SSE
> +   && (register_operand (operands[1], <MODE>mode)
> +       || register_operand (operands[2], <MODE>mode))"
>  {
>    char buf[64];
>    const char *ops;
> @@ -17281,6 +17283,15 @@
>      case 2:
>        ops = "v%s%s\t{%%2, %%1, %%0|%%0, %%1, %%2}";
>        break;
> +    case 3:
> +    case 4:
> +      tmp = "pternlog";
> +      ssesuffix = "<ternlogsuffix>";
> +      if (which_alternative != 4 || TARGET_AVX512VL)
> +       ops = "v%s%s\t{$0x44, %%1, %%2, %%0|%%0, %%2, %%1, $0x44}";
> +      else
> +       ops = "v%s%s\t{$0x44, %%g1, %%g2, %%g0|%%g0, %%g2, %%g1, $0x44}";
> +      break;
>      default:
>        gcc_unreachable ();
>      }
> @@ -17289,7 +17300,7 @@
>    output_asm_insn (buf, operands);
>    return "";
>  }
> -  [(set_attr "isa" "noavx,avx,avx")
> +  [(set_attr "isa" "noavx,avx,avx,*,*")
>     (set_attr "type" "sselog")
>     (set (attr "prefix_data16")
>       (if_then_else
> @@ -17297,9 +17308,12 @@
>             (eq_attr "mode" "TI"))
>         (const_string "1")
>         (const_string "*")))
> -   (set_attr "prefix" "orig,vex,evex")
> +   (set_attr "prefix" "orig,vex,evex,evex,evex")
>     (set (attr "mode")
> -       (cond [(match_test "TARGET_AVX2")
> +       (cond [(and (eq_attr "alternative" "3,4")
> +                   (match_test "<MODE_SIZE> < 64 && !TARGET_AVX512VL"))
> +                (const_string "XI")
> +              (match_test "TARGET_AVX2")
>                  (const_string "<sseinsnmode>")
>                (match_test "TARGET_AVX")
>                  (if_then_else
> @@ -17310,7 +17324,15 @@
>                     (match_test "optimize_function_for_size_p (cfun)"))
>                  (const_string "V4SF")
>               ]
> -             (const_string "<sseinsnmode>")))])
> +             (const_string "<sseinsnmode>")))
> +   (set (attr "enabled")
> +       (cond [(eq_attr "alternative" "3")
> +                (symbol_ref "<MODE_SIZE> == 64 || TARGET_AVX512VL")
> +              (eq_attr "alternative" "4")
> +                (symbol_ref "<MODE_SIZE> == 64 || TARGET_AVX512VL
> +                             || (TARGET_AVX512F && !TARGET_PREFER_AVX256)")
> +             ]
> +             (const_string "*")))])
>
>  ;; PR target/100711: Split notl; vpbroadcastd; vpand as vpbroadcastd; vpandn
>  (define_split
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/avx512f-andn-di-zmm-2.c
> @@ -0,0 +1,12 @@
> +/* { dg-do compile } */
> +/* { dg-options "-mavx512f -mno-avx512vl -mprefer-vector-width=512 -O2" } */
> +/* { dg-final { scan-assembler-times "vpternlogq\[ \\t\]+\\\$0x44, \\(%(?:eax|rdi|edi)\\)\\\{1to\[1-8\]+\\\}, %zmm\[0-9\]+, %zmm0" 1 } } */
> +/* { dg-final { scan-assembler-not "vpbroadcast" } } */
> +
> +#define type __m512i
> +#define vec 512
> +#define op andnot
> +#define suffix epi64
> +#define SCALAR long long
> +
> +#include "avx512-binop-2.h"
> --- a/gcc/testsuite/gcc.target/i386/avx512f-andn-si-zmm-2.c
> +++ b/gcc/testsuite/gcc.target/i386/avx512f-andn-si-zmm-2.c
> @@ -1,7 +1,7 @@
>  /* { dg-do compile } */
>  /* { dg-options "-mavx512f -O2" } */
> -/* { dg-final { scan-assembler-times "vpbroadcastd\[^\n\]*%zmm\[0-9\]+" 1 } } */
> -/* { dg-final { scan-assembler-times "vpandnd\[^\n\]*%zmm\[0-9\]+" 1 } } */
> +/* { dg-final { scan-assembler-times "vpternlogd\[ \\t\]+\\\$0x44, \\(%(?:eax|rdi|edi)\\)\\\{1to\[1-8\]+\\\}, %zmm\[0-9\]+, %zmm0" 1 } } */
> +/* { dg-final { scan-assembler-not "vpbroadcast" } } */
>
>  #define type __m512i
>  #define vec 512
> --- a/gcc/testsuite/gcc.target/i386/pr100711-3.c
> +++ b/gcc/testsuite/gcc.target/i386/pr100711-3.c
> @@ -37,4 +37,6 @@ v8di foo_v8di (long long a, v8di b)
>      return (__extension__ (v8di) {~a, ~a, ~a, ~a, ~a, ~a, ~a, ~a}) & b;
>  }
>
> -/* { dg-final { scan-assembler-times "vpandn" 4 } } */
> +/* { dg-final { scan-assembler-times "vpandn" 4 { target { ! ia32 } } } } */
> +/* { dg-final { scan-assembler-times "vpandn" 2 { target { ia32 } } } } */
> +/* { dg-final { scan-assembler-times "vpternlog\[dq\]\[ \\t\]+\\\$0x44" 2 { target { ia32 } } } } */
>
diff mbox series

Patch

--- a/gcc/config/i386/sse.md
+++ b/gcc/config/i386/sse.md
@@ -17210,11 +17210,13 @@ 
   "TARGET_AVX512F")
 
 (define_insn "*andnot<mode>3"
-  [(set (match_operand:VI 0 "register_operand" "=x,x,v")
+  [(set (match_operand:VI 0 "register_operand" "=x,x,v,v,v")
 	(and:VI
-	  (not:VI (match_operand:VI 1 "vector_operand" "0,x,v"))
-	  (match_operand:VI 2 "bcst_vector_operand" "xBm,xm,vmBr")))]
-  "TARGET_SSE"
+	  (not:VI (match_operand:VI 1 "bcst_vector_operand" "0,x,v,m,Br"))
+	  (match_operand:VI 2 "bcst_vector_operand" "xBm,xm,vmBr,v,v")))]
+  "TARGET_SSE
+   && (register_operand (operands[1], <MODE>mode)
+       || register_operand (operands[2], <MODE>mode))"
 {
   char buf[64];
   const char *ops;
@@ -17281,6 +17283,15 @@ 
     case 2:
       ops = "v%s%s\t{%%2, %%1, %%0|%%0, %%1, %%2}";
       break;
+    case 3:
+    case 4:
+      tmp = "pternlog";
+      ssesuffix = "<ternlogsuffix>";
+      if (which_alternative != 4 || TARGET_AVX512VL)
+	ops = "v%s%s\t{$0x44, %%1, %%2, %%0|%%0, %%2, %%1, $0x44}";
+      else
+	ops = "v%s%s\t{$0x44, %%g1, %%g2, %%g0|%%g0, %%g2, %%g1, $0x44}";
+      break;
     default:
       gcc_unreachable ();
     }
@@ -17289,7 +17300,7 @@ 
   output_asm_insn (buf, operands);
   return "";
 }
-  [(set_attr "isa" "noavx,avx,avx")
+  [(set_attr "isa" "noavx,avx,avx,*,*")
    (set_attr "type" "sselog")
    (set (attr "prefix_data16")
      (if_then_else
@@ -17297,9 +17308,12 @@ 
 	    (eq_attr "mode" "TI"))
        (const_string "1")
        (const_string "*")))
-   (set_attr "prefix" "orig,vex,evex")
+   (set_attr "prefix" "orig,vex,evex,evex,evex")
    (set (attr "mode")
-	(cond [(match_test "TARGET_AVX2")
+	(cond [(and (eq_attr "alternative" "3,4")
+		    (match_test "<MODE_SIZE> < 64 && !TARGET_AVX512VL"))
+		 (const_string "XI")
+	       (match_test "TARGET_AVX2")
 		 (const_string "<sseinsnmode>")
 	       (match_test "TARGET_AVX")
 		 (if_then_else
@@ -17310,7 +17324,15 @@ 
 		    (match_test "optimize_function_for_size_p (cfun)"))
 		 (const_string "V4SF")
 	      ]
-	      (const_string "<sseinsnmode>")))])
+	      (const_string "<sseinsnmode>")))
+   (set (attr "enabled")
+	(cond [(eq_attr "alternative" "3")
+		 (symbol_ref "<MODE_SIZE> == 64 || TARGET_AVX512VL")
+	       (eq_attr "alternative" "4")
+		 (symbol_ref "<MODE_SIZE> == 64 || TARGET_AVX512VL
+			      || (TARGET_AVX512F && !TARGET_PREFER_AVX256)")
+	      ]
+	      (const_string "*")))])
 
 ;; PR target/100711: Split notl; vpbroadcastd; vpand as vpbroadcastd; vpandn
 (define_split
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-andn-di-zmm-2.c
@@ -0,0 +1,12 @@ 
+/* { dg-do compile } */
+/* { dg-options "-mavx512f -mno-avx512vl -mprefer-vector-width=512 -O2" } */
+/* { dg-final { scan-assembler-times "vpternlogq\[ \\t\]+\\\$0x44, \\(%(?:eax|rdi|edi)\\)\\\{1to\[1-8\]+\\\}, %zmm\[0-9\]+, %zmm0" 1 } } */
+/* { dg-final { scan-assembler-not "vpbroadcast" } } */
+
+#define type __m512i
+#define vec 512
+#define op andnot
+#define suffix epi64
+#define SCALAR long long
+
+#include "avx512-binop-2.h"
--- a/gcc/testsuite/gcc.target/i386/avx512f-andn-si-zmm-2.c
+++ b/gcc/testsuite/gcc.target/i386/avx512f-andn-si-zmm-2.c
@@ -1,7 +1,7 @@ 
 /* { dg-do compile } */
 /* { dg-options "-mavx512f -O2" } */
-/* { dg-final { scan-assembler-times "vpbroadcastd\[^\n\]*%zmm\[0-9\]+" 1 } } */
-/* { dg-final { scan-assembler-times "vpandnd\[^\n\]*%zmm\[0-9\]+" 1 } } */
+/* { dg-final { scan-assembler-times "vpternlogd\[ \\t\]+\\\$0x44, \\(%(?:eax|rdi|edi)\\)\\\{1to\[1-8\]+\\\}, %zmm\[0-9\]+, %zmm0" 1 } } */
+/* { dg-final { scan-assembler-not "vpbroadcast" } } */
 
 #define type __m512i
 #define vec 512
--- a/gcc/testsuite/gcc.target/i386/pr100711-3.c
+++ b/gcc/testsuite/gcc.target/i386/pr100711-3.c
@@ -37,4 +37,6 @@  v8di foo_v8di (long long a, v8di b)
     return (__extension__ (v8di) {~a, ~a, ~a, ~a, ~a, ~a, ~a, ~a}) & b;
 }
 
-/* { dg-final { scan-assembler-times "vpandn" 4 } } */
+/* { dg-final { scan-assembler-times "vpandn" 4 { target { ! ia32 } } } } */
+/* { dg-final { scan-assembler-times "vpandn" 2 { target { ia32 } } } } */
+/* { dg-final { scan-assembler-times "vpternlog\[dq\]\[ \\t\]+\\\$0x44" 2 { target { ia32 } } } } */