@@ -19,7 +19,7 @@
;;; Unused letters:
;;; B H T
-;;; h jk
+;;; h j
;; Integer register constraints.
;; It is not necessary to define 'r' here.
@@ -78,6 +78,12 @@
"TARGET_80387 || TARGET_FLOAT_RETURNS_IN_80387 ? FP_SECOND_REG : NO_REGS"
"Second from top of 80387 floating-point stack (@code{%st(1)}).")
+(define_register_constraint "k" "TARGET_AVX512F ? MASK_EVEX_REGS : NO_REGS"
+"@internal Any mask register that can be used as predicate, i.e. k1-k7.")
+
+(define_register_constraint "Yk" "TARGET_AVX512F ? MASK_REGS : NO_REGS"
+"@internal Any mask register.")
+
;; Vector registers (also used for plain floating point nowadays).
(define_register_constraint "y" "TARGET_MMX ? MMX_REGS : NO_REGS"
"Any MMX register.")
@@ -2193,6 +2193,9 @@ enum reg_class const regclass_map[FIRST_PSEUDO_REGISTER] =
EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS,
EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS,
EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS,
+ /* Mask registers. */
+ MASK_REGS, MASK_EVEX_REGS, MASK_EVEX_REGS, MASK_EVEX_REGS,
+ MASK_EVEX_REGS, MASK_EVEX_REGS, MASK_EVEX_REGS, MASK_EVEX_REGS,
};
/* The "default" register map used in 32bit mode. */
@@ -2208,6 +2211,7 @@ int const dbx_register_map[FIRST_PSEUDO_REGISTER] =
-1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
-1, -1, -1, -1, -1, -1, -1, -1, /* new SSE registers 16-23*/
-1, -1, -1, -1, -1, -1, -1, -1, /* new SSE registers 24-31*/
+ 93, 94, 95, 96, 97, 98, 99, 100, /* Mask registers */
};
/* The "default" register map used in 64bit mode. */
@@ -2223,6 +2227,7 @@ int const dbx64_register_map[FIRST_PSEUDO_REGISTER] =
25, 26, 27, 28, 29, 30, 31, 32, /* extended SSE registers */
67, 68, 69, 70, 71, 72, 73, 74, /* new SSE registers 16-23 */
75, 76, 77, 78, 79, 80, 81, 82, /* new SSE registers 24-31 */
+ 118, 119, 120, 121, 122, 123, 124, 125, /* Mask registers */
};
/* Define the register numbers to be used in Dwarf debugging information.
@@ -2290,6 +2295,7 @@ int const svr4_dbx_register_map[FIRST_PSEUDO_REGISTER] =
-1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
-1, -1, -1, -1, -1, -1, -1, -1, /* new SSE registers 16-23*/
-1, -1, -1, -1, -1, -1, -1, -1, /* new SSE registers 24-31*/
+ 93, 94, 95, 96, 97, 98, 99, 100, /* Mask registers */
};
/* Define parameter passing and return registers. */
@@ -4137,7 +4143,8 @@ ix86_conditional_register_usage (void)
/* If AVX512F is disabled, squash the registers. */
if (! TARGET_AVX512F)
for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
- if (TEST_HARD_REG_BIT (reg_class_contents[(int)EVEX_SSE_REGS], i))
+ if (TEST_HARD_REG_BIT (reg_class_contents[(int)MASK_REGS], i)
+ || TEST_HARD_REG_BIT (reg_class_contents[(int)EVEX_SSE_REGS], i))
fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
}
@@ -33790,10 +33797,12 @@ ix86_preferred_reload_class (rtx x, reg_class_t regclass)
return regclass;
/* Force constants into memory if we are loading a (nonzero) constant into
- an MMX or SSE register. This is because there are no MMX/SSE instructions
- to load from a constant. */
+ an MMX, SSE or MASK register. This is because there are no MMX/SSE/MASK
+ instructions to load from a constant. */
if (CONSTANT_P (x)
- && (MAYBE_MMX_CLASS_P (regclass) || MAYBE_SSE_CLASS_P (regclass)))
+ && (MAYBE_MMX_CLASS_P (regclass)
+ || MAYBE_SSE_CLASS_P (regclass)
+ || MAYBE_MASK_CLASS_P (regclass)))
return NO_REGS;
/* Prefer SSE regs only, if we can use them for math. */
@@ -34326,6 +34335,8 @@ ix86_hard_regno_mode_ok (int regno, enum machine_mode mode)
return false;
if (STACK_REGNO_P (regno))
return VALID_FP_MODE_P (mode);
+ if (MASK_REGNO_P (regno))
+ return VALID_MASK_REG_MODE (mode);
if (SSE_REGNO_P (regno))
{
/* We implement the move patterns for all vector modes into and
@@ -35126,6 +35137,10 @@ x86_order_regs_for_local_alloc (void)
for (i = FIRST_EXT_REX_SSE_REG; i <= LAST_EXT_REX_SSE_REG; i++)
reg_alloc_order [pos++] = i;
+ /* Mask register. */
+ for (i = FIRST_MASK_REG; i <= LAST_MASK_REG; i++)
+ reg_alloc_order [pos++] = i;
+
/* x87 registers. */
if (TARGET_SSE_MATH)
for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
@@ -963,7 +963,7 @@ enum target_cpu_default
eliminated during reloading in favor of either the stack or frame
pointer. */
-#define FIRST_PSEUDO_REGISTER 69
+#define FIRST_PSEUDO_REGISTER 77
/* Number of hardware registers that go into the DWARF-2 unwind info.
If not defined, equals FIRST_PSEUDO_REGISTER. */
@@ -993,7 +993,9 @@ enum target_cpu_default
/*xmm16,xmm17,xmm18,xmm19,xmm20,xmm21,xmm22,xmm23*/ \
0, 0, 0, 0, 0, 0, 0, 0, \
/*xmm24,xmm25,xmm26,xmm27,xmm28,xmm29,xmm30,xmm31*/ \
- 0, 0, 0, 0, 0, 0, 0, 0 }
+ 0, 0, 0, 0, 0, 0, 0, 0, \
+/* k0, k1, k2, k3, k4, k5, k6, k7*/ \
+ 0, 0, 0, 0, 0, 0, 0, 0 }
/* 1 for registers not available across function calls.
These must include the FIXED_REGISTERS and also any
@@ -1025,7 +1027,9 @@ enum target_cpu_default
/*xmm16,xmm17,xmm18,xmm19,xmm20,xmm21,xmm22,xmm23*/ \
6, 6, 6, 6, 6, 6, 6, 6, \
/*xmm24,xmm25,xmm26,xmm27,xmm28,xmm29,xmm30,xmm31*/ \
- 6, 6, 6, 6, 6, 6, 6, 6 }
+ 6, 6, 6, 6, 6, 6, 6, 6, \
+ /* k0, k1, k2, k3, k4, k5, k6, k7*/ \
+ 1, 1, 1, 1, 1, 1, 1, 1 }
/* Order in which to allocate registers. Each register must be
listed once, even those in FIXED_REGISTERS. List frame pointer
@@ -1041,7 +1045,7 @@ enum target_cpu_default
18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, \
33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, \
48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, \
- 63, 64, 65, 66, 67, 68 }
+ 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76 }
/* ADJUST_REG_ALLOC_ORDER is a macro which permits reg_alloc_order
to be rearranged based on a particular function. When using sse math,
@@ -1138,6 +1142,8 @@ enum target_cpu_default
|| (MODE) == V16SImode || (MODE) == V32HImode || (MODE) == V8DFmode \
|| (MODE) == V16SFmode)
+#define VALID_MASK_REG_MODE(MODE) ((MODE) == HImode || (MODE) == QImode)
+
/* Value is 1 if hard register REGNO can hold a value of machine-mode MODE. */
#define HARD_REGNO_MODE_OK(REGNO, MODE) \
@@ -1211,6 +1217,9 @@ enum target_cpu_default
#define FIRST_EXT_REX_SSE_REG (LAST_REX_SSE_REG + 1) /*53*/
#define LAST_EXT_REX_SSE_REG (FIRST_EXT_REX_SSE_REG + 15) /*68*/
+#define FIRST_MASK_REG (LAST_EXT_REX_SSE_REG + 1) /*69*/
+#define LAST_MASK_REG (FIRST_MASK_REG + 7) /*76*/
+
/* Override this in other tm.h files to cope with various OS lossage
requiring a frame pointer. */
#ifndef SUBTARGET_FRAME_POINTER_REQUIRED
@@ -1299,6 +1308,8 @@ enum reg_class
FLOAT_INT_REGS,
INT_SSE_REGS,
FLOAT_INT_SSE_REGS,
+ MASK_EVEX_REGS,
+ MASK_REGS,
ALL_REGS, LIM_REG_CLASSES
};
@@ -1320,6 +1331,8 @@ enum reg_class
reg_classes_intersect_p (ALL_SSE_REGS, (CLASS))
#define MAYBE_MMX_CLASS_P(CLASS) \
reg_classes_intersect_p (MMX_REGS, (CLASS))
+#define MAYBE_MASK_CLASS_P(CLASS) \
+ reg_classes_intersect_p (MASK_REGS, (CLASS))
#define Q_CLASS_P(CLASS) \
reg_class_subset_p ((CLASS), Q_REGS)
@@ -1349,6 +1362,8 @@ enum reg_class
"FLOAT_INT_REGS", \
"INT_SSE_REGS", \
"FLOAT_INT_SSE_REGS", \
+ "MASK_EVEX_REGS", \
+ "MASK_REGS", \
"ALL_REGS" }
/* Define which registers fit in which classes. This is an initializer
@@ -1386,7 +1401,9 @@ enum reg_class
{ 0x11ffff, 0x1fe0, 0x0 }, /* FLOAT_INT_REGS */ \
{ 0x1ff100ff,0xffffffe0, 0x1f }, /* INT_SSE_REGS */ \
{ 0x1ff1ffff,0xffffffe0, 0x1f }, /* FLOAT_INT_SSE_REGS */ \
-{ 0xffffffff,0xffffffff, 0x1f } \
+ { 0x0, 0x0,0x1fc0 }, /* MASK_EVEX_REGS */ \
+ { 0x0, 0x0,0x1fe0 }, /* MASK_REGS */ \
+{ 0xffffffff,0xffffffff,0x1fff } \
}
/* The same information, inverted:
@@ -1444,6 +1461,8 @@ enum reg_class
: (N) <= LAST_REX_SSE_REG ? (FIRST_REX_SSE_REG + (N) - 8) \
: (FIRST_EXT_REX_SSE_REG + (N) - 16))
+#define MASK_REGNO_P(N) IN_RANGE ((N), FIRST_MASK_REG, LAST_MASK_REG)
+#define ANY_MASK_REG_P(X) (REG_P (X) && MASK_REGNO_P (REGNO (X)))
#define SSE_FLOAT_MODE_P(MODE) \
((TARGET_SSE && (MODE) == SFmode) || (TARGET_SSE2 && (MODE) == DFmode))
@@ -1496,10 +1515,10 @@ enum reg_class
/* Get_secondary_mem widens integral modes to BITS_PER_WORD.
There is no need to emit full 64 bit move on 64 bit targets
- for integral modes that can be moved using 32 bit move. */
+ for integral modes that can be moved using 8 bit move. */
#define SECONDARY_MEMORY_NEEDED_MODE(MODE) \
- (GET_MODE_BITSIZE (MODE) < 32 && INTEGRAL_MODE_P (MODE) \
- ? mode_for_size (32, GET_MODE_CLASS (MODE), 0) \
+ (GET_MODE_BITSIZE (MODE) < 8 && INTEGRAL_MODE_P (MODE) \
+ ? mode_for_size (8, GET_MODE_CLASS (MODE), 0) \
: MODE)
/* Return a class of registers that cannot change FROM mode to TO mode. */
@@ -2000,7 +2019,8 @@ do { \
"xmm16", "xmm17", "xmm18", "xmm19", \
"xmm20", "xmm21", "xmm22", "xmm23", \
"xmm24", "xmm25", "xmm26", "xmm27", \
- "xmm28", "xmm29", "xmm30", "xmm31" }
+ "xmm28", "xmm29", "xmm30", "xmm31", \
+ "k0", "k1", "k2", "k3", "k4", "k5", "k6", "k7" }
#define REGISTER_NAMES HI_REGISTER_NAMES
@@ -178,6 +178,12 @@
;; For BMI2 support
UNSPEC_PDEP
UNSPEC_PEXT
+
+ ;; For AVX512F mask support
+ UNSPEC_KIOR
+ UNSPEC_KXOR
+ UNSPEC_KAND
+ UNSPEC_KANDN
])
(define_c_enum "unspecv" [
@@ -328,6 +334,14 @@
(XMM29_REG 66)
(XMM30_REG 67)
(XMM31_REG 68)
+ (MASK0_REG 69)
+ (MASK1_REG 70)
+ (MASK2_REG 71)
+ (MASK3_REG 72)
+ (MASK4_REG 73)
+ (MASK5_REG 74)
+ (MASK6_REG 75)
+ (MASK7_REG 76)
])
;; Insns whose names begin with "x86_" are emitted by gen_FOO calls
@@ -360,7 +374,7 @@
sseishft,sseishft1,ssecmp,ssecomi,
ssecvt,ssecvt1,sseicvt,sseins,
sseshuf,sseshuf1,ssemuladd,sse4arg,
- lwp,
+ lwp,mskmov,msklog,
mmx,mmxmov,mmxadd,mmxmul,mmxcmp,mmxcvt,mmxshft"
(const_string "other"))
@@ -379,7 +393,7 @@
ssemul,sseimul,ssediv,sselog,sselog1,
sseishft,sseishft1,ssecmp,ssecomi,
ssecvt,ssecvt1,sseicvt,sseins,
- sseshuf,sseshuf1,ssemuladd,sse4arg")
+ sseshuf,sseshuf1,ssemuladd,sse4arg,mskmov")
(const_string "sse")
(eq_attr "type" "mmx,mmxmov,mmxadd,mmxmul,mmxcmp,mmxcvt,mmxshft")
(const_string "mmx")
@@ -390,7 +404,7 @@
;; The (bounding maximum) length of an instruction immediate.
(define_attr "length_immediate" ""
(cond [(eq_attr "type" "incdec,setcc,icmov,str,lea,other,multi,idiv,leave,
- bitmanip,imulx")
+ bitmanip,imulx,msklog,mskmov")
(const_int 0)
(eq_attr "unit" "i387,sse,mmx")
(const_int 0)
@@ -451,7 +465,7 @@
;; Set when 0f opcode prefix is used.
(define_attr "prefix_0f" ""
(if_then_else
- (ior (eq_attr "type" "imovx,setcc,icmov,bitmanip")
+ (ior (eq_attr "type" "imovx,setcc,icmov,bitmanip,msklog,mskmov")
(eq_attr "unit" "sse,mmx"))
(const_int 1)
(const_int 0)))
@@ -651,7 +665,7 @@
fmov,fcmp,fsgn,
sse,ssemov,ssecmp,ssecomi,ssecvt,ssecvt1,sseicvt,
sselog1,sseshuf1,sseadd1,sseiadd1,sseishft1,
- mmx,mmxmov,mmxcmp,mmxcvt")
+ mmx,mmxmov,mmxcmp,mmxcvt,mskmov,msklog")
(match_operand 2 "memory_operand"))
(const_string "load")
(and (eq_attr "type" "icmov,ssemuladd,sse4arg")
@@ -2211,8 +2225,8 @@
(const_string "SI")))])
(define_insn "*movhi_internal"
- [(set (match_operand:HI 0 "nonimmediate_operand" "=r,r ,r ,m")
- (match_operand:HI 1 "general_operand" "r ,rn,rm,rn"))]
+ [(set (match_operand:HI 0 "nonimmediate_operand" "=r,r ,r ,m ,Yk,Yk,rm")
+ (match_operand:HI 1 "general_operand" "r ,rn,rm,rn,rm,Yk,Yk"))]
"!(MEM_P (operands[0]) && MEM_P (operands[1]))"
{
switch (get_attr_type (insn))
@@ -2221,6 +2235,16 @@
/* movzwl is faster than movw on p2 due to partial word stalls,
though not as fast as an aligned movl. */
return "movz{wl|x}\t{%1, %k0|%k0, %1}";
+
+ case TYPE_MSKMOV:
+ switch (which_alternative)
+ {
+ case 4: return "kmovw\t{%k1, %0|%0, %k1}";
+ case 5: return "kmovw\t{%1, %0|%0, %1}";
+ case 6: return "kmovw\t{%1, %k0|%k0, %1}";
+ default: gcc_unreachable ();
+ }
+
default:
if (get_attr_mode (insn) == MODE_SI)
return "mov{l}\t{%k1, %k0|%k0, %k1}";
@@ -2238,11 +2262,17 @@
(and (eq_attr "alternative" "1,2")
(match_operand:HI 1 "aligned_operand"))
(const_string "imov")
+ (eq_attr "alternative" "4,5,6")
+ (const_string "mskmov")
(and (match_test "TARGET_MOVX")
(eq_attr "alternative" "0,2"))
(const_string "imovx")
]
(const_string "imov")))
+ (set (attr "prefix")
+ (if_then_else (eq_attr "alternative" "4,5,6")
+ (const_string "vex")
+ (const_string "orig")))
(set (attr "mode")
(cond [(eq_attr "type" "imovx")
(const_string "SI")
@@ -2267,8 +2297,8 @@
;; register stall machines with, where we use QImode instructions, since
;; partial register stall can be caused there. Then we use movzx.
(define_insn "*movqi_internal"
- [(set (match_operand:QI 0 "nonimmediate_operand" "=q,q ,q ,r,r ,?r,m")
- (match_operand:QI 1 "general_operand" "q ,qn,qm,q,rn,qm,qn"))]
+ [(set (match_operand:QI 0 "nonimmediate_operand" "=q,q ,q ,r,r ,?r,m ,Yk,Yk,r")
+ (match_operand:QI 1 "general_operand" "q ,qn,qm,q,rn,qm,qn,rm,Yk,Yk"))]
"!(MEM_P (operands[0]) && MEM_P (operands[1]))"
{
switch (get_attr_type (insn))
@@ -2276,6 +2306,16 @@
case TYPE_IMOVX:
gcc_assert (ANY_QI_REG_P (operands[1]) || MEM_P (operands[1]));
return "movz{bl|x}\t{%1, %k0|%k0, %1}";
+
+ case TYPE_MSKMOV:
+ switch (which_alternative)
+ {
+ case 7: return "kmovw\t{%k1, %0|%0, %k1}";
+ case 8: return "kmovw\t{%1, %0|%0, %1}";
+ case 9: return "kmovw\t{%1, %k0|%k0, %1}";
+ default: gcc_unreachable ();
+ }
+
default:
if (get_attr_mode (insn) == MODE_SI)
return "mov{l}\t{%k1, %k0|%k0, %k1}";
@@ -2295,11 +2335,17 @@
(const_string "imov")
(eq_attr "alternative" "3,5")
(const_string "imovx")
+ (eq_attr "alternative" "7,8,9")
+ (const_string "mskmov")
(and (match_test "TARGET_MOVX")
(eq_attr "alternative" "2"))
(const_string "imovx")
]
(const_string "imov")))
+ (set (attr "prefix")
+ (if_then_else (eq_attr "alternative" "7,8,9")
+ (const_string "vex")
+ (const_string "orig")))
(set (attr "mode")
(cond [(eq_attr "alternative" "3,4,5")
(const_string "SI")
@@ -7614,6 +7660,18 @@
(const_string "*")))
(set_attr "mode" "HI,HI,SI")])
+(define_insn "kandn<mode>"
+ [(set (match_operand:SWI12 0 "register_operand" "=Yk")
+ (unspec:SWI12
+ [(match_operand:SWI12 1 "register_operand" "Yk")
+ (match_operand:SWI12 2 "register_operand" "Yk")]
+ UNSPEC_KANDN))]
+ "TARGET_AVX512F"
+ "kandnw\t{%2, %1, %0|%0, %1, %2}"
+ [(set_attr "mode" "<MODE>")
+ (set_attr "type" "msklog")
+ (set_attr "prefix" "vex")])
+
;; %%% Potential partial reg stall on alternative 2. What to do?
(define_insn "*andqi_1"
[(set (match_operand:QI 0 "nonimmediate_operand" "=qm,q,r")
@@ -7639,6 +7697,18 @@
[(set_attr "type" "alu1")
(set_attr "mode" "QI")])
+(define_insn "kand<mode>"
+ [(set (match_operand:SWI12 0 "register_operand" "=Yk")
+ (unspec:SWI12
+ [(match_operand:SWI12 1 "register_operand" "Yk")
+ (match_operand:SWI12 2 "register_operand" "Yk")]
+ UNSPEC_KAND))]
+ "TARGET_AVX512F"
+ "kandw\t{%2, %1, %0|%0, %1, %2}"
+ [(set_attr "type" "msklog")
+ (set_attr "prefix" "vex")
+ (set_attr "mode" "<MODE>")])
+
;; Turn *anddi_1 into *andsi_1_zext if possible.
(define_split
[(set (match_operand:DI 0 "register_operand")
@@ -8042,6 +8112,81 @@
[(set_attr "type" "alu")
(set_attr "mode" "<MODE>")])
+(define_insn "kior<mode>"
+ [(set (match_operand:SWI12 0 "register_operand" "=Yk")
+ (unspec:SWI12
+ [(match_operand:SWI12 1 "register_operand" "Yk")
+ (match_operand:SWI12 2 "register_operand" "Yk")]
+ UNSPEC_KIOR))]
+ "TARGET_AVX512F"
+ "korw\t{%2, %1, %0|%0, %1, %2}"
+ [(set_attr "type" "msklog")
+ (set_attr "prefix" "vex")
+ (set_attr "mode" "<MODE>")])
+
+(define_insn "kxor<mode>"
+ [(set (match_operand:SWI12 0 "register_operand" "=Yk")
+ (unspec:SWI12
+ [(match_operand:SWI12 1 "register_operand" "Yk")
+ (match_operand:SWI12 2 "register_operand" "Yk")]
+ UNSPEC_KXOR))]
+ "TARGET_AVX512F"
+ "kxorw\t{%2, %1, %0|%0, %1, %2}"
+ [(set_attr "type" "msklog")
+ (set_attr "prefix" "vex")
+ (set_attr "mode" "<MODE>")])
+
+(define_insn "kxnor<mode>"
+ [(set (match_operand:SWI12 0 "register_operand" "=Yk")
+ (not:SWI12
+ (xor:SWI12
+ (match_operand:SWI12 1 "register_operand" "Yk")
+ (match_operand:SWI12 2 "register_operand" "Yk"))))]
+ "TARGET_AVX512F"
+ "kxnorw\t{%2, %1, %0|%0, %1, %2}"
+ [(set_attr "mode" "<MODE>")
+ (set_attr "type" "msklog")
+ (set_attr "prefix" "vex")])
+
+(define_insn "kortestzhi"
+ [(set (reg:CCZ FLAGS_REG)
+ (compare:CCZ
+ (ior:HI
+ (match_operand:HI 0 "register_operand" "%Yk")
+ (match_operand:HI 1 "register_operand" "Yk"))
+ (const_int 0)))]
+ "TARGET_AVX512F && ix86_match_ccmode (insn, CCZmode)"
+ "kortestw\t{%1, %0|%0, %1}"
+ [(set_attr "mode" "HI")
+ (set_attr "type" "msklog")
+ (set_attr "prefix" "vex")])
+
+(define_insn "kortestchi"
+ [(set (reg:CCC FLAGS_REG)
+ (compare:CCC
+ (ior:HI
+ (match_operand:HI 0 "register_operand" "%Yk")
+ (match_operand:HI 1 "register_operand" "Yk"))
+ (const_int -1)))]
+ "TARGET_AVX512F && ix86_match_ccmode (insn, CCCmode)"
+ "kortestw\t{%1, %0|%0, %1}"
+ [(set_attr "mode" "HI")
+ (set_attr "type" "msklog")
+ (set_attr "prefix" "vex")])
+
+(define_insn "kunpckhi"
+ [(set (match_operand:HI 0 "register_operand" "=Yk")
+ (ior:HI
+ (ashift:HI
+ (match_operand:HI 1 "register_operand" "Yk")
+ (const_int 8))
+ (zero_extend:HI (subreg:QI (match_operand:HI 2 "register_operand" "Yk") 0))))]
+ "TARGET_AVX512F"
+ "kunpckbw\t{%2, %1, %0|%0, %1, %2}"
+ [(set_attr "mode" "HI")
+ (set_attr "type" "msklog")
+ (set_attr "prefix" "vex")])
+
;; See comment for addsi_1_zext why we do use nonimmediate_operand
;; ??? Special case for immediate operand is missing - it is tricky.
(define_insn "*<code>si_2_zext"
@@ -8611,23 +8756,38 @@
"ix86_expand_unary_operator (NOT, <MODE>mode, operands); DONE;")
(define_insn "*one_cmpl<mode>2_1"
- [(set (match_operand:SWI248 0 "nonimmediate_operand" "=rm")
- (not:SWI248 (match_operand:SWI248 1 "nonimmediate_operand" "0")))]
+ [(set (match_operand:SWI48 0 "nonimmediate_operand" "=rm")
+ (not:SWI48 (match_operand:SWI48 1 "nonimmediate_operand" "0")))]
"ix86_unary_operator_ok (NOT, <MODE>mode, operands)"
"not{<imodesuffix>}\t%0"
[(set_attr "type" "negnot")
(set_attr "mode" "<MODE>")])
+(define_insn "*one_cmplhi2_1"
+ [(set (match_operand:HI 0 "nonimmediate_operand" "=rm,Yk")
+ (not:HI (match_operand:HI 1 "nonimmediate_operand" "0,Yk")))]
+ "ix86_unary_operator_ok (NOT, HImode, operands)"
+ "@
+ not{w}\t%0
+ knotw\t{%1, %0|%0, %1}"
+ [(set_attr "isa" "*,avx512f")
+ (set_attr "type" "negnot,msklog")
+ (set_attr "prefix" "*,vex")
+ (set_attr "mode" "HI")])
+
;; %%% Potential partial reg stall on alternative 1. What to do?
(define_insn "*one_cmplqi2_1"
- [(set (match_operand:QI 0 "nonimmediate_operand" "=qm,r")
- (not:QI (match_operand:QI 1 "nonimmediate_operand" "0,0")))]
+ [(set (match_operand:QI 0 "nonimmediate_operand" "=qm,r,Yk")
+ (not:QI (match_operand:QI 1 "nonimmediate_operand" "0,0,Yk")))]
"ix86_unary_operator_ok (NOT, QImode, operands)"
"@
not{b}\t%0
- not{l}\t%k0"
- [(set_attr "type" "negnot")
- (set_attr "mode" "QI,SI")])
+ not{l}\t%k0
+ knotw\t{%1, %0|%0, %1}"
+ [(set_attr "isa" "*,*,avx512f")
+ (set_attr "type" "negnot,negnot,msklog")
+ (set_attr "prefix" "*,*,vex")
+ (set_attr "mode" "QI,SI,QI")])
;; ??? Currently never generated - xor is used instead.
(define_insn "*one_cmplsi2_1_zext"
@@ -16351,7 +16511,7 @@
})
;; Avoid redundant prefixes by splitting HImode arithmetic to SImode.
-
+;; Do not split instructions with mask registers.
(define_split
[(set (match_operand 0 "register_operand")
(match_operator 3 "promotable_binary_operator"
@@ -16365,7 +16525,10 @@
|| !CONST_INT_P (operands[2])
|| satisfies_constraint_K (operands[2])))
|| (GET_MODE (operands[0]) == QImode
- && (TARGET_PROMOTE_QImode || optimize_function_for_size_p (cfun))))"
+ && (TARGET_PROMOTE_QImode || optimize_function_for_size_p (cfun))))
+ && (! ANY_MASK_REG_P (operands[0])
+ || ! ANY_MASK_REG_P (operands[1])
+ || ! ANY_MASK_REG_P (operands[2]))"
[(parallel [(set (match_dup 0)
(match_op_dup 3 [(match_dup 1) (match_dup 2)]))
(clobber (reg:CC FLAGS_REG))])]
@@ -16450,6 +16613,7 @@
operands[1] = gen_lowpart (SImode, operands[1]);
})
+;; Do not split instructions with mask regs.
(define_split
[(set (match_operand 0 "register_operand")
(not (match_operand 1 "register_operand")))]
@@ -16457,7 +16621,9 @@
&& (GET_MODE (operands[0]) == HImode
|| (GET_MODE (operands[0]) == QImode
&& (TARGET_PROMOTE_QImode
- || optimize_insn_for_size_p ())))"
+ || optimize_insn_for_size_p ())))
+ && (! ANY_MASK_REG_P (operands[0])
+ || ! ANY_MASK_REG_P (operands[1]))"
[(set (match_dup 0)
(not:SI (match_dup 1)))]
{