@@ -1192,12 +1192,18 @@ (define_predicate "float_vector_all_ones_operand"
return false;
})
-/* Return true if operand is a vector constant that is all ones. */
+/* Return true if operand is an integral vector constant that is all ones. */
(define_predicate "vector_all_ones_operand"
(and (match_code "const_vector")
(match_test "INTEGRAL_MODE_P (GET_MODE (op))")
(match_test "op == CONSTM1_RTX (GET_MODE (op))")))
+/* Return true if operand is a vector constant that is all ones. */
+(define_predicate "int_float_vector_all_ones_operand"
+ (ior (match_operand 0 "vector_all_ones_operand")
+ (match_operand 0 "float_vector_all_ones_operand")
+ (match_test "op == constm1_rtx")))
+
/* Return true if operand is an 128/256bit all ones vector
that zero-extends to 256/512bit. */
(define_predicate "vector_all_ones_zero_extend_half_operand"
@@ -1382,6 +1382,29 @@ (define_insn "mov<mode>_internal"
]
(symbol_ref "true")))])
+; False dependency happens on destination register which is not really
+; used when moving all ones to vector register
+(define_split
+ [(set (match_operand:VMOVE 0 "register_operand")
+ (match_operand:VMOVE 1 "int_float_vector_all_ones_operand"))]
+ "TARGET_AVX512F && reload_completed
+ && (<MODE_SIZE> == 64 || EXT_REX_SSE_REG_P (operands[0]))
+ && optimize_insn_for_speed_p ()"
+ [(set (match_dup 0) (match_dup 2))
+ (parallel
+ [(set (match_dup 0) (match_dup 1))
+ (unspec [(match_dup 0)] UNSPEC_INSN_FALSE_DEP)])]
+ "operands[2] = CONST0_RTX (<MODE>mode);")
+
+(define_insn "*vmov<mode>_constm1_pternlog_false_dep"
+ [(set (match_operand:VMOVE 0 "register_operand" "=v")
+ (match_operand:VMOVE 1 "int_float_vector_all_ones_operand" "<sseconstm1>"))
+ (unspec [(match_operand:VMOVE 2 "register_operand" "0")] UNSPEC_INSN_FALSE_DEP)]
+ "TARGET_AVX512VL || <MODE_SIZE> == 64"
+ "vpternlogd\t{$0xFF, %0, %0, %0|%0, %0, %0, 0xFF}"
+ [(set_attr "type" "sselog1")
+ (set_attr "prefix" "evex")])
+
;; If mem_addr points to a memory region with less than whole vector size bytes
;; of accessible memory and k is a mask that would prevent reading the inaccessible
;; bytes from mem_addr, add UNSPEC_MASKLOAD to prevent it to be transformed to vpblendd
@@ -9336,7 +9359,7 @@ (define_expand "<avx512>_cvtmask2<ssemodesuffix><mode>"
operands[3] = CONST0_RTX (<MODE>mode);
}")
-(define_insn "*<avx512>_cvtmask2<ssemodesuffix><mode>"
+(define_insn_and_split "*<avx512>_cvtmask2<ssemodesuffix><mode>"
[(set (match_operand:VI48_AVX512VL 0 "register_operand" "=v,v")
(vec_merge:VI48_AVX512VL
(match_operand:VI48_AVX512VL 2 "vector_all_ones_operand")
@@ -9346,11 +9369,35 @@ (define_insn "*<avx512>_cvtmask2<ssemodesuffix><mode>"
"@
vpmovm2<ssemodesuffix>\t{%1, %0|%0, %1}
vpternlog<ssemodesuffix>\t{$0x81, %0, %0, %0%{%1%}%{z%}|%0%{%1%}%{z%}, %0, %0, 0x81}"
+ "&& !TARGET_AVX512DQ && reload_completed
+ && optimize_function_for_speed_p (cfun)"
+ [(set (match_dup 0) (match_dup 4))
+ (parallel
+ [(set (match_dup 0)
+ (vec_merge:VI48_AVX512VL
+ (match_dup 2)
+ (match_dup 3)
+ (match_dup 1)))
+ (unspec [(match_dup 0)] UNSPEC_INSN_FALSE_DEP)])]
+ "operands[4] = CONST0_RTX (<MODE>mode);"
[(set_attr "isa" "avx512dq,*")
(set_attr "length_immediate" "0,1")
(set_attr "prefix" "evex")
(set_attr "mode" "<sseinsnmode>")])
+(define_insn "*<avx512>_cvtmask2<ssemodesuffix><mode>_pternlog_false_dep"
+ [(set (match_operand:VI48_AVX512VL 0 "register_operand" "=v")
+ (vec_merge:VI48_AVX512VL
+ (match_operand:VI48_AVX512VL 2 "vector_all_ones_operand")
+ (match_operand:VI48_AVX512VL 3 "const0_operand")
+ (match_operand:<avx512fmaskmode> 1 "register_operand" "Yk")))
+ (unspec [(match_operand:VI48_AVX512VL 4 "register_operand" "0")] UNSPEC_INSN_FALSE_DEP)]
+ "TARGET_AVX512F && !TARGET_AVX512DQ"
+ "vpternlog<ssemodesuffix>\t{$0x81, %0, %0, %0%{%1%}%{z%}|%0%{%1%}%{z%}, %0, %0, 0x81}"
+ [(set_attr "length_immediate" "1")
+ (set_attr "prefix" "evex")
+ (set_attr "mode" "<sseinsnmode>")])
+
(define_expand "extendv2sfv2df2"
[(set (match_operand:V2DF 0 "register_operand")
(float_extend:V2DF
@@ -17166,14 +17213,82 @@ (define_expand "one_cmpl<mode>2"
operands[2] = force_reg (<MODE>mode, operands[2]);
})
-(define_insn "<mask_codefor>one_cmpl<mode>2<mask_name>"
- [(set (match_operand:VI 0 "register_operand" "=v,v")
- (xor:VI (match_operand:VI 1 "bcst_vector_operand" "vBr,m")
- (match_operand:VI 2 "vector_all_ones_operand" "BC,BC")))]
+(define_insn_and_split "<mask_codefor>one_cmpl<mode>2<mask_name>"
+ [(set (match_operand:VI 0 "register_operand" "=v,v,v")
+ (xor:VI (match_operand:VI 1 "bcst_vector_operand" " 0, m,Br")
+ (match_operand:VI 2 "vector_all_ones_operand" "BC,BC,BC")))]
"TARGET_AVX512F
&& (!<mask_applied>
|| <ssescalarmode>mode == SImode
|| <ssescalarmode>mode == DImode)"
+{
+ if (!<mask_applied> && which_alternative
+ && optimize_insn_for_speed_p ())
+ return "#";
+
+ if (TARGET_AVX512VL)
+ return "vpternlog<ternlogsuffix>\t{$0x55, %1, %0, %0<mask_operand3>|%0<mask_operand3>, %0, %1, 0x55}";
+ else
+ return "vpternlog<ternlogsuffix>\t{$0x55, %g1, %g0, %g0<mask_operand3>|%g0<mask_operand3>, %g0, %g1, 0x55}";
+}
+ "&& reload_completed && !REG_P (operands[1]) && !<mask_applied>
+ && optimize_insn_for_speed_p ()"
+ [(set (match_dup 0) (match_dup 3))
+ (parallel
+ [(set (match_dup 0)
+ (xor:VI (match_dup 1) (match_dup 2)))
+ (unspec [(match_dup 0)] UNSPEC_INSN_FALSE_DEP)])]
+{
+ if (MEM_P (operands[1]))
+ {
+ operands[3] = operands[1];
+ operands[1] = operands[0];
+ }
+ else
+ {
+ if (GET_MODE_SIZE (<ssescalarmode>mode) < 4)
+ {
+ if (<MODE_SIZE> == 64 ? TARGET_AVX512BW
+ : (TARGET_AVX512BW && TARGET_AVX512VL)
+ || !EXT_REX_SSE_REG_P (operands[0]))
+ {
+ operands[3] = operands[1];
+ operands[1] = operands[0];
+ }
+ else
+ operands[3] = CONST0_RTX (<MODE>mode);
+ }
+ else
+ {
+ if (<MODE_SIZE> == 64 || TARGET_AVX512VL
+ || !EXT_REX_SSE_REG_P (operands[0]))
+ {
+ operands[3] = operands[1];
+ operands[1] = operands[0];
+ }
+ else
+ operands[3] = CONST0_RTX (<MODE>mode);
+ }
+ }
+}
+ [(set_attr "type" "sselog")
+ (set_attr "prefix" "evex")
+ (set (attr "mode")
+ (if_then_else (match_test "TARGET_AVX512VL")
+ (const_string "<sseinsnmode>")
+ (const_string "XI")))
+ (set (attr "enabled")
+ (if_then_else (eq_attr "alternative" "1")
+ (symbol_ref "<MODE_SIZE> == 64 || TARGET_AVX512VL")
+ (const_int 1)))])
+
+(define_insn "*one_cmpl<mode>2_pternlog_false_dep"
+ [(set (match_operand:VI 0 "register_operand" "=v,v,v")
+ (xor:VI (match_operand:VI 1 "bcst_vector_operand" " 0, m,Br")
+ (match_operand:VI 2 "vector_all_ones_operand" "BC,BC,BC")))
+ (unspec [(match_operand:VI 3 "register_operand" "0,0,0")]
+ UNSPEC_INSN_FALSE_DEP)]
+ "TARGET_AVX512F"
{
if (TARGET_AVX512VL)
return "vpternlog<ternlogsuffix>\t{$0x55, %1, %0, %0<mask_operand3>|%0<mask_operand3>, %0, %1, 0x55}";
@@ -17226,7 +17341,7 @@ (define_insn "*andnot<mode>3"
[(set (match_operand:VI 0 "register_operand" "=x,x,v,v,v")
(and:VI
(not:VI (match_operand:VI 1 "bcst_vector_operand" "0,x,v,m,Br"))
- (match_operand:VI 2 "bcst_vector_operand" "xBm,xm,vmBr,v,v")))]
+ (match_operand:VI 2 "bcst_vector_operand" "xBm,xm,vmBr,0,0")))]
"TARGET_SSE
&& (register_operand (operands[1], <MODE>mode)
|| register_operand (operands[2], <MODE>mode))"
@@ -17685,8 +17800,8 @@ (define_insn "*iornot<mode>3"
[(set (match_operand:VI 0 "register_operand" "=v,v,v,v")
(ior:VI
(not:VI
- (match_operand:VI 1 "bcst_vector_operand" "v,Br,v,m"))
- (match_operand:VI 2 "bcst_vector_operand" "vBr,v,m,v")))]
+ (match_operand:VI 1 "bcst_vector_operand" "0,m, 0,vBr"))
+ (match_operand:VI 2 "bcst_vector_operand" "m,0,vBr, 0")))]
"(<MODE_SIZE> == 64 || TARGET_AVX512VL
|| (TARGET_AVX512F && !TARGET_PREFER_AVX256))
&& (register_operand (operands[1], <MODE>mode)
@@ -17710,7 +17825,7 @@ (define_insn "*iornot<mode>3"
(const_string "<sseinsnmode>")
(const_string "XI")))
(set (attr "enabled")
- (if_then_else (eq_attr "alternative" "2,3")
+ (if_then_else (eq_attr "alternative" "0,1")
(symbol_ref "<MODE_SIZE> == 64 || TARGET_AVX512VL")
(const_string "*")))])
@@ -17718,8 +17833,8 @@ (define_insn "*xnor<mode>3"
[(set (match_operand:VI 0 "register_operand" "=v,v")
(not:VI
(xor:VI
- (match_operand:VI 1 "bcst_vector_operand" "%v,v")
- (match_operand:VI 2 "bcst_vector_operand" "vBr,m"))))]
+ (match_operand:VI 1 "bcst_vector_operand" "%0, 0")
+ (match_operand:VI 2 "bcst_vector_operand" " m,vBr"))))]
"(<MODE_SIZE> == 64 || TARGET_AVX512VL
|| (TARGET_AVX512F && !TARGET_PREFER_AVX256))
&& (register_operand (operands[1], <MODE>mode)
@@ -17738,7 +17853,7 @@ (define_insn "*xnor<mode>3"
(const_string "<sseinsnmode>")
(const_string "XI")))
(set (attr "enabled")
- (if_then_else (eq_attr "alternative" "1")
+ (if_then_else (eq_attr "alternative" "0")
(symbol_ref "<MODE_SIZE> == 64 || TARGET_AVX512VL")
(const_string "*")))])
@@ -17749,8 +17864,8 @@ (define_code_attr ternlog_nlogic [(and "0x11") (ior "0x77")])
(define_insn "*<nlogic><mode>3"
[(set (match_operand:VI 0 "register_operand" "=v,v")
(andor:VI
- (not:VI (match_operand:VI 1 "bcst_vector_operand" "%v,v"))
- (not:VI (match_operand:VI 2 "bcst_vector_operand" "vBr,m"))))]
+ (not:VI (match_operand:VI 1 "bcst_vector_operand" "%0, 0"))
+ (not:VI (match_operand:VI 2 "bcst_vector_operand" "m,vBr"))))]
"(<MODE_SIZE> == 64 || TARGET_AVX512VL
|| (TARGET_AVX512F && !TARGET_PREFER_AVX256))
&& (register_operand (operands[1], <MODE>mode)
@@ -17769,7 +17884,7 @@ (define_insn "*<nlogic><mode>3"
(const_string "<sseinsnmode>")
(const_string "XI")))
(set (attr "enabled")
- (if_then_else (eq_attr "alternative" "1")
+ (if_then_else (eq_attr "alternative" "0")
(symbol_ref "<MODE_SIZE> == 64 || TARGET_AVX512VL")
(const_string "*")))])
@@ -1,5 +1,5 @@
/* { dg-do compile } */
-/* { dg-options "-mavx512f -mno-avx512vl -mprefer-vector-width=512 -O2" } */
+/* { dg-options "-mavx512f -mno-avx512vl -mprefer-vector-width=512 -Os" } */
typedef int v16si __attribute__ ((vector_size (64)));
typedef long long v8di __attribute__((vector_size (64)));
new file mode 100644
@@ -0,0 +1,30 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512f -O2 -ftree-vectorize -mno-avx512dq -dp -mprefer-vector-width=512" } */
+/* { dg-final { scan-assembler-times {cvtmask2.*_pternlog} "1" } } */
+/* { dg-final { scan-assembler-times {constm1_pternlog} "1" } } */
+/* { dg-final { scan-assembler-not {(?n)vpternlogd.*\(} } } */
+
+
+#include <immintrin.h>
+
+__m512i g(void)
+{
+ return (__m512i){ 0 } - 1;
+}
+
+__m512i g1(__m512i* a)
+{
+ return ~(*a);
+}
+
+void
+foo (int* a, int* __restrict b)
+{
+ for (int i = 0; i != 16; i++)
+ {
+ if (b[i])
+ a[i] = -1;
+ else
+ a[i] = 0;
+ }
+}