@@ -15126,6 +15126,54 @@
[(set_attr "type" "ishiftx")
(set_attr "mode" "<MODE>")])
+(define_insn "*ashl<mode>3_1_nf"
+ [(set (match_operand:SWI48 0 "nonimmediate_operand" "=rm,r,r,?k,r")
+ (ashift:SWI48 (match_operand:SWI48 1 "nonimmediate_operand" "0,l,rm,k,rm")
+ (match_operand:QI 2 "nonmemory_operand"
+"c<S>,M,r,<KS>,c<S>")))]
+ "TARGET_APX_NF &&
+ ix86_binary_operator_ok (ASHIFT, <MODE>mode, operands, TARGET_APX_NDD)"
+{
+ bool use_ndd = get_attr_isa (insn) == ISA_APX_NDD;
+ switch (get_attr_type (insn))
+ {
+ case TYPE_LEA:
+ case TYPE_ISHIFTX:
+ case TYPE_MSKLOG:
+ return "#";
+
+ case TYPE_ALU:
+ gcc_assert (operands[2] == const1_rtx);
+ gcc_assert (rtx_equal_p (operands[0], operands[1]));
+ return "%{nf%} add{<imodesuffix>}\t%0, %0";
+
+ default:
+ return use_ndd ? "%{nf%} sal{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2}"
+ : "%{nf%} sal{<imodesuffix>}\t{%2, %0|%0, %2}";
+ }
+}
+ [(set_attr "isa" "*,*,bmi2,avx512bw,apx_ndd")
+ (set (attr "type")
+ (cond [(eq_attr "alternative" "1")
+ (const_string "lea")
+ (eq_attr "alternative" "2")
+ (const_string "ishiftx")
+ (eq_attr "alternative" "4")
+ (const_string "ishift")
+ (and (and (match_test "TARGET_DOUBLE_WITH_ADD")
+ (match_operand 0 "register_operand"))
+ (match_operand 2 "const1_operand"))
+ (const_string "alu")
+ (eq_attr "alternative" "3")
+ (const_string "msklog")
+ ]
+ (const_string "ishift")))
+ (set (attr "length_immediate")
+ (if_then_else
+ (eq_attr "type" "alu")
+ (const_string "0")
+ (const_string "*")))
+ (set_attr "mode" "<MODE>")])
+
(define_insn "*ashl<mode>3_1"
[(set (match_operand:SWI48 0 "nonimmediate_operand" "=rm,r,r,?k,r")
(ashift:SWI48 (match_operand:SWI48 1 "nonimmediate_operand" "0,l,rm,k,rm") @@ -15187,6 +15235,17 @@
(set_attr "mode" "<MODE>")])
;; Convert shift to the shiftx pattern to avoid flags dependency.
+;; For NF/NDD doesn't support shift count as r, it just support c<S>,
+;; but it has no flag.
+(define_split
+ [(set (match_operand:SWI48 0 "register_operand")
+ (ashift:SWI48 (match_operand:SWI48 1 "nonimmediate_operand")
+ (match_operand:QI 2 "register_operand")))]
+ "TARGET_BMI2 && reload_completed"
+ [(set (match_dup 0)
+ (ashift:SWI48 (match_dup 1) (match_dup 2)))]
+ "operands[2] = gen_lowpart (<MODE>mode, operands[2]);")
+
(define_split
[(set (match_operand:SWI48 0 "register_operand")
(ashift:SWI48 (match_operand:SWI48 1 "nonimmediate_operand") @@ -15273,6 +15332,50 @@
(zero_extend:DI (ashift:SI (match_dup 1) (match_dup 2))))]
"operands[2] = gen_lowpart (SImode, operands[2]);")
+(define_insn "*ashlhi3_1_nf"
+ [(set (match_operand:HI 0 "nonimmediate_operand" "=rm,Yp,?k,r")
+ (ashift:HI (match_operand:HI 1 "nonimmediate_operand" "0,l,k,rm")
+ (match_operand:QI 2 "nonmemory_operand" "cI,M,Ww,cI")))]
+ "TARGET_APX_NF &&
+ ix86_binary_operator_ok (ASHIFT, HImode, operands, TARGET_APX_NDD)"
+{
+ bool use_ndd = get_attr_isa (insn) == ISA_APX_NDD;
+ switch (get_attr_type (insn))
+ {
+ case TYPE_LEA:
+ case TYPE_MSKLOG:
+ return "#";
+
+ case TYPE_ALU:
+ gcc_assert (operands[2] == const1_rtx);
+ return "%{nf%} add{w}\t%0, %0";
+
+ default:
+ return use_ndd ? "%{nf%} sal{w}\t{%2, %1, %0|%0, %1, %2}"
+ : "%{nf%} sal{w}\t{%2, %0|%0, %2}";
+ }
+}
+ [(set_attr "isa" "*,*,avx512f,apx_ndd")
+ (set (attr "type")
+ (cond [(eq_attr "alternative" "1")
+ (const_string "lea")
+ (eq_attr "alternative" "2")
+ (const_string "msklog")
+ (eq_attr "alternative" "3")
+ (const_string "ishift")
+ (and (and (match_test "TARGET_DOUBLE_WITH_ADD")
+ (match_operand 0 "register_operand"))
+ (match_operand 2 "const1_operand"))
+ (const_string "alu")
+ ]
+ (const_string "ishift")))
+ (set (attr "length_immediate")
+ (if_then_else
+ (eq_attr "type" "alu")
+ (const_string "0")
+ (const_string "*")))
+ (set_attr "mode" "HI,SI,HI,HI")])
+
(define_insn "*ashlhi3_1"
[(set (match_operand:HI 0 "nonimmediate_operand" "=rm,Yp,?k,r")
(ashift:HI (match_operand:HI 1 "nonimmediate_operand" "0,l,k,rm") @@ -15326,6 +15429,61 @@
(const_string "*")))
(set_attr "mode" "HI,SI,HI,HI")])
+(define_insn "*ashlqi3_1_nf"
+ [(set (match_operand:QI 0 "nonimmediate_operand" "=qm,r,Yp,?k,r")
+ (ashift:QI (match_operand:QI 1 "nonimmediate_operand" "0,0,l,k,rm")
+ (match_operand:QI 2 "nonmemory_operand" "cI,cI,M,Wb,cI")))]
+ "TARGET_APX_NF &&
+ ix86_binary_operator_ok (ASHIFT, QImode, operands, TARGET_APX_NDD)"
+{
+ bool use_ndd = get_attr_isa (insn) == ISA_APX_NDD;
+ switch (get_attr_type (insn))
+ {
+ case TYPE_LEA:
+ case TYPE_MSKLOG:
+ return "#";
+
+ case TYPE_ALU:
+ gcc_assert (operands[2] == const1_rtx);
+ if (REG_P (operands[1]) && !ANY_QI_REGNO_P (REGNO (operands[1])))
+ return "%{nf%} add{l}\t%k0, %k0";
+ else
+ return "%{nf%} add{b}\t%0, %0";
+
+ default:
+ if (get_attr_mode (insn) == MODE_SI)
+ return "%{nf%} sal{l}\t{%2, %k0|%k0, %2}";
+ else
+ return use_ndd ? "%{nf%} sal{b}\t{%2, %1, %0|%0, %1, %2}"
+ : "%{nf%} sal{b}\t{%2, %0|%0, %2}";
+ }
+}
+ [(set_attr "isa" "*,*,*,avx512dq,apx_ndd")
+ (set (attr "type")
+ (cond [(eq_attr "alternative" "2")
+ (const_string "lea")
+ (eq_attr "alternative" "3")
+ (const_string "msklog")
+ (eq_attr "alternative" "4")
+ (const_string "ishift")
+ (and (and (match_test "TARGET_DOUBLE_WITH_ADD")
+ (match_operand 0 "register_operand"))
+ (match_operand 2 "const1_operand"))
+ (const_string "alu")
+ ]
+ (const_string "ishift")))
+ (set (attr "length_immediate")
+ (if_then_else
+ (eq_attr "type" "alu")
+ (const_string "0")
+ (const_string "*")))
+ (set_attr "mode" "QI,SI,SI,QI,QI")
+ ;; Potential partial reg stall on alternative 1.
+ (set (attr "preferred_for_speed")
+ (cond [(eq_attr "alternative" "1")
+ (symbol_ref "!TARGET_PARTIAL_REG_STALL")]
+ (symbol_ref "true")))])
+
(define_insn "*ashlqi3_1"
[(set (match_operand:QI 0 "nonimmediate_operand" "=qm,r,Yp,?k,r")
(ashift:QI (match_operand:QI 1 "nonimmediate_operand" "0,0,l,k,rm") @@ -15448,6 +15606,23 @@
(set_attr "mode" "<MODE>")])
;; Convert ashift to the lea pattern to avoid flags dependency.
+(define_split
+ [(set (match_operand:SWI 0 "general_reg_operand")
+ (ashift:SWI (match_operand:SWI 1 "index_reg_operand")
+ (match_operand 2 "const_0_to_3_operand")))]
+ "reload_completed
+ && REGNO (operands[0]) != REGNO (operands[1])"
+ [(set (match_dup 0)
+ (mult:<LEAMODE> (match_dup 1) (match_dup 2)))] {
+ if (<MODE>mode != <LEAMODE>mode)
+ {
+ operands[0] = gen_lowpart (<LEAMODE>mode, operands[0]);
+ operands[1] = gen_lowpart (<LEAMODE>mode, operands[1]);
+ }
+ operands[2] = GEN_INT (1 << INTVAL (operands[2]));
+})
+
(define_split
[(set (match_operand:SWI 0 "general_reg_operand")
(ashift:SWI (match_operand:SWI 1 "index_reg_operand") diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md index 72d4556f47d..498ca5e4d1b 100644
@@ -2169,6 +2169,19 @@
(set_attr "prefix" "vex")
(set_attr "mode" "<MODE>")])
+(define_split
+ [(set (match_operand:SWI1248_AVX512BW 0 "mask_reg_operand")
+ (any_lshift:SWI1248_AVX512BW
+ (match_operand:SWI1248_AVX512BW 1 "mask_reg_operand")
+ (match_operand 2 "const_int_operand")))]
+ "TARGET_AVX512F && reload_completed"
+ [(parallel
+ [(set (match_dup 0)
+ (any_lshift:SWI1248_AVX512BW
+ (match_dup 1)
+ (match_dup 2)))
+ (unspec [(const_int 0)] UNSPEC_MASKOP)])])
+
(define_split
[(set (match_operand:SWI1248_AVX512BW 0 "mask_reg_operand")
(any_lshift:SWI1248_AVX512BW