Message ID | 3cf55c98-d18a-d1ad-2fc2-015c63e217ca@suse.com |
---|---|
State | New |
Headers | show |
Series | x86: make better use of VPTERNLOG{D,Q} | expand |
On Wed, Jun 21, 2023 at 2:27 PM Jan Beulich via Gcc-patches <gcc-patches@gcc.gnu.org> wrote: > > When it's the memory operand which is to be inverted, using VPANDN* > requires a further load instruction. The same can be achieved by a > single VPTERNLOG*. Add two new alternatives (for plain memory and > embedded broadcast), adjusting the predicate for the first operand > accordingly. > > Two pre-existing testcases actually end up being affected (improved) by > the change, which is reflected in updated expectations there. LGTM. > > gcc/ > > PR target/93768 > * config/i386/sse.md (*andnot<mode>3): Add new alternatives > for memory form operand 1. > > gcc/testsuite/ > > PR target/93768 > * gcc.target/i386/avx512f-andn-di-zmm-2.c: New test. > * gcc.target/i386/avx512f-andn-si-zmm-2.c: Adjust expecations > towards generated code. > * gcc.target/i386/pr100711-3.c: Adjust expectations for 32-bit > code. > > --- a/gcc/config/i386/sse.md > +++ b/gcc/config/i386/sse.md > @@ -17210,11 +17210,13 @@ > "TARGET_AVX512F") > > (define_insn "*andnot<mode>3" > - [(set (match_operand:VI 0 "register_operand" "=x,x,v") > + [(set (match_operand:VI 0 "register_operand" "=x,x,v,v,v") > (and:VI > - (not:VI (match_operand:VI 1 "vector_operand" "0,x,v")) > - (match_operand:VI 2 "bcst_vector_operand" "xBm,xm,vmBr")))] > - "TARGET_SSE" > + (not:VI (match_operand:VI 1 "bcst_vector_operand" "0,x,v,m,Br")) > + (match_operand:VI 2 "bcst_vector_operand" "xBm,xm,vmBr,v,v")))] > + "TARGET_SSE > + && (register_operand (operands[1], <MODE>mode) > + || register_operand (operands[2], <MODE>mode))" > { > char buf[64]; > const char *ops; > @@ -17281,6 +17283,15 @@ > case 2: > ops = "v%s%s\t{%%2, %%1, %%0|%%0, %%1, %%2}"; > break; > + case 3: > + case 4: > + tmp = "pternlog"; > + ssesuffix = "<ternlogsuffix>"; > + if (which_alternative != 4 || TARGET_AVX512VL) > + ops = "v%s%s\t{$0x44, %%1, %%2, %%0|%%0, %%2, %%1, $0x44}"; > + else > + ops = "v%s%s\t{$0x44, %%g1, %%g2, %%g0|%%g0, %%g2, %%g1, $0x44}"; > + break; > default: > gcc_unreachable (); > } > @@ -17289,7 +17300,7 @@ > output_asm_insn (buf, operands); > return ""; > } > - [(set_attr "isa" "noavx,avx,avx") > + [(set_attr "isa" "noavx,avx,avx,*,*") > (set_attr "type" "sselog") > (set (attr "prefix_data16") > (if_then_else > @@ -17297,9 +17308,12 @@ > (eq_attr "mode" "TI")) > (const_string "1") > (const_string "*"))) > - (set_attr "prefix" "orig,vex,evex") > + (set_attr "prefix" "orig,vex,evex,evex,evex") > (set (attr "mode") > - (cond [(match_test "TARGET_AVX2") > + (cond [(and (eq_attr "alternative" "3,4") > + (match_test "<MODE_SIZE> < 64 && !TARGET_AVX512VL")) > + (const_string "XI") > + (match_test "TARGET_AVX2") > (const_string "<sseinsnmode>") > (match_test "TARGET_AVX") > (if_then_else > @@ -17310,7 +17324,15 @@ > (match_test "optimize_function_for_size_p (cfun)")) > (const_string "V4SF") > ] > - (const_string "<sseinsnmode>")))]) > + (const_string "<sseinsnmode>"))) > + (set (attr "enabled") > + (cond [(eq_attr "alternative" "3") > + (symbol_ref "<MODE_SIZE> == 64 || TARGET_AVX512VL") > + (eq_attr "alternative" "4") > + (symbol_ref "<MODE_SIZE> == 64 || TARGET_AVX512VL > + || (TARGET_AVX512F && !TARGET_PREFER_AVX256)") > + ] > + (const_string "*")))]) > > ;; PR target/100711: Split notl; vpbroadcastd; vpand as vpbroadcastd; vpandn > (define_split > --- /dev/null > +++ b/gcc/testsuite/gcc.target/i386/avx512f-andn-di-zmm-2.c > @@ -0,0 +1,12 @@ > +/* { dg-do compile } */ > +/* { dg-options "-mavx512f -mno-avx512vl -mprefer-vector-width=512 -O2" } */ > +/* { dg-final { scan-assembler-times "vpternlogq\[ \\t\]+\\\$0x44, \\(%(?:eax|rdi|edi)\\)\\\{1to\[1-8\]+\\\}, %zmm\[0-9\]+, %zmm0" 1 } } */ > +/* { dg-final { scan-assembler-not "vpbroadcast" } } */ > + > +#define type __m512i > +#define vec 512 > +#define op andnot > +#define suffix epi64 > +#define SCALAR long long > + > +#include "avx512-binop-2.h" > --- a/gcc/testsuite/gcc.target/i386/avx512f-andn-si-zmm-2.c > +++ b/gcc/testsuite/gcc.target/i386/avx512f-andn-si-zmm-2.c > @@ -1,7 +1,7 @@ > /* { dg-do compile } */ > /* { dg-options "-mavx512f -O2" } */ > -/* { dg-final { scan-assembler-times "vpbroadcastd\[^\n\]*%zmm\[0-9\]+" 1 } } */ > -/* { dg-final { scan-assembler-times "vpandnd\[^\n\]*%zmm\[0-9\]+" 1 } } */ > +/* { dg-final { scan-assembler-times "vpternlogd\[ \\t\]+\\\$0x44, \\(%(?:eax|rdi|edi)\\)\\\{1to\[1-8\]+\\\}, %zmm\[0-9\]+, %zmm0" 1 } } */ > +/* { dg-final { scan-assembler-not "vpbroadcast" } } */ > > #define type __m512i > #define vec 512 > --- a/gcc/testsuite/gcc.target/i386/pr100711-3.c > +++ b/gcc/testsuite/gcc.target/i386/pr100711-3.c > @@ -37,4 +37,6 @@ v8di foo_v8di (long long a, v8di b) > return (__extension__ (v8di) {~a, ~a, ~a, ~a, ~a, ~a, ~a, ~a}) & b; > } > > -/* { dg-final { scan-assembler-times "vpandn" 4 } } */ > +/* { dg-final { scan-assembler-times "vpandn" 4 { target { ! ia32 } } } } */ > +/* { dg-final { scan-assembler-times "vpandn" 2 { target { ia32 } } } } */ > +/* { dg-final { scan-assembler-times "vpternlog\[dq\]\[ \\t\]+\\\$0x44" 2 { target { ia32 } } } } */ >
--- a/gcc/config/i386/sse.md +++ b/gcc/config/i386/sse.md @@ -17210,11 +17210,13 @@ "TARGET_AVX512F") (define_insn "*andnot<mode>3" - [(set (match_operand:VI 0 "register_operand" "=x,x,v") + [(set (match_operand:VI 0 "register_operand" "=x,x,v,v,v") (and:VI - (not:VI (match_operand:VI 1 "vector_operand" "0,x,v")) - (match_operand:VI 2 "bcst_vector_operand" "xBm,xm,vmBr")))] - "TARGET_SSE" + (not:VI (match_operand:VI 1 "bcst_vector_operand" "0,x,v,m,Br")) + (match_operand:VI 2 "bcst_vector_operand" "xBm,xm,vmBr,v,v")))] + "TARGET_SSE + && (register_operand (operands[1], <MODE>mode) + || register_operand (operands[2], <MODE>mode))" { char buf[64]; const char *ops; @@ -17281,6 +17283,15 @@ case 2: ops = "v%s%s\t{%%2, %%1, %%0|%%0, %%1, %%2}"; break; + case 3: + case 4: + tmp = "pternlog"; + ssesuffix = "<ternlogsuffix>"; + if (which_alternative != 4 || TARGET_AVX512VL) + ops = "v%s%s\t{$0x44, %%1, %%2, %%0|%%0, %%2, %%1, $0x44}"; + else + ops = "v%s%s\t{$0x44, %%g1, %%g2, %%g0|%%g0, %%g2, %%g1, $0x44}"; + break; default: gcc_unreachable (); } @@ -17289,7 +17300,7 @@ output_asm_insn (buf, operands); return ""; } - [(set_attr "isa" "noavx,avx,avx") + [(set_attr "isa" "noavx,avx,avx,*,*") (set_attr "type" "sselog") (set (attr "prefix_data16") (if_then_else @@ -17297,9 +17308,12 @@ (eq_attr "mode" "TI")) (const_string "1") (const_string "*"))) - (set_attr "prefix" "orig,vex,evex") + (set_attr "prefix" "orig,vex,evex,evex,evex") (set (attr "mode") - (cond [(match_test "TARGET_AVX2") + (cond [(and (eq_attr "alternative" "3,4") + (match_test "<MODE_SIZE> < 64 && !TARGET_AVX512VL")) + (const_string "XI") + (match_test "TARGET_AVX2") (const_string "<sseinsnmode>") (match_test "TARGET_AVX") (if_then_else @@ -17310,7 +17324,15 @@ (match_test "optimize_function_for_size_p (cfun)")) (const_string "V4SF") ] - (const_string "<sseinsnmode>")))]) + (const_string "<sseinsnmode>"))) + (set (attr "enabled") + (cond [(eq_attr "alternative" "3") + (symbol_ref "<MODE_SIZE> == 64 || TARGET_AVX512VL") + (eq_attr "alternative" "4") + (symbol_ref "<MODE_SIZE> == 64 || TARGET_AVX512VL + || (TARGET_AVX512F && !TARGET_PREFER_AVX256)") + ] + (const_string "*")))]) ;; PR target/100711: Split notl; vpbroadcastd; vpand as vpbroadcastd; vpandn (define_split --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx512f-andn-di-zmm-2.c @@ -0,0 +1,12 @@ +/* { dg-do compile } */ +/* { dg-options "-mavx512f -mno-avx512vl -mprefer-vector-width=512 -O2" } */ +/* { dg-final { scan-assembler-times "vpternlogq\[ \\t\]+\\\$0x44, \\(%(?:eax|rdi|edi)\\)\\\{1to\[1-8\]+\\\}, %zmm\[0-9\]+, %zmm0" 1 } } */ +/* { dg-final { scan-assembler-not "vpbroadcast" } } */ + +#define type __m512i +#define vec 512 +#define op andnot +#define suffix epi64 +#define SCALAR long long + +#include "avx512-binop-2.h" --- a/gcc/testsuite/gcc.target/i386/avx512f-andn-si-zmm-2.c +++ b/gcc/testsuite/gcc.target/i386/avx512f-andn-si-zmm-2.c @@ -1,7 +1,7 @@ /* { dg-do compile } */ /* { dg-options "-mavx512f -O2" } */ -/* { dg-final { scan-assembler-times "vpbroadcastd\[^\n\]*%zmm\[0-9\]+" 1 } } */ -/* { dg-final { scan-assembler-times "vpandnd\[^\n\]*%zmm\[0-9\]+" 1 } } */ +/* { dg-final { scan-assembler-times "vpternlogd\[ \\t\]+\\\$0x44, \\(%(?:eax|rdi|edi)\\)\\\{1to\[1-8\]+\\\}, %zmm\[0-9\]+, %zmm0" 1 } } */ +/* { dg-final { scan-assembler-not "vpbroadcast" } } */ #define type __m512i #define vec 512 --- a/gcc/testsuite/gcc.target/i386/pr100711-3.c +++ b/gcc/testsuite/gcc.target/i386/pr100711-3.c @@ -37,4 +37,6 @@ v8di foo_v8di (long long a, v8di b) return (__extension__ (v8di) {~a, ~a, ~a, ~a, ~a, ~a, ~a, ~a}) & b; } -/* { dg-final { scan-assembler-times "vpandn" 4 } } */ +/* { dg-final { scan-assembler-times "vpandn" 4 { target { ! ia32 } } } } */ +/* { dg-final { scan-assembler-times "vpandn" 2 { target { ia32 } } } } */ +/* { dg-final { scan-assembler-times "vpternlog\[dq\]\[ \\t\]+\\\$0x44" 2 { target { ia32 } } } } */