@@ -919,8 +919,7 @@ general_scalar_chain::convert_op (rtx *op, rtx_insn *insn)
rtx
scalar_chain::convert_compare (rtx op1, rtx op2, rtx_insn *insn)
{
- rtx tmp = gen_reg_rtx (vmode);
- rtx src;
+ rtx src, tmp;
/* Comparison against anything other than zero, requires an XOR. */
if (op2 != const0_rtx)
{
@@ -929,6 +928,7 @@ scalar_chain::convert_compare (rtx op1, rtx op2, rtx_insn *insn)
/* If both operands are MEMs, explicitly load the OP1 into TMP. */
if (MEM_P (op1) && MEM_P (op2))
{
+ tmp = gen_reg_rtx (vmode);
emit_insn_before (gen_rtx_SET (tmp, op1), insn);
src = tmp;
}
@@ -943,34 +943,56 @@ scalar_chain::convert_compare (rtx op1, rtx op2, rtx_insn *insn)
rtx op12 = XEXP (op1, 1);
convert_op (&op11, insn);
convert_op (&op12, insn);
- if (MEM_P (op11))
+ if (!REG_P (op11))
{
+ tmp = gen_reg_rtx (vmode);
emit_insn_before (gen_rtx_SET (tmp, op11), insn);
op11 = tmp;
}
src = gen_rtx_AND (vmode, gen_rtx_NOT (vmode, op11), op12);
}
+ else if (GET_CODE (op1) == AND)
+ {
+ rtx op11 = XEXP (op1, 0);
+ rtx op12 = XEXP (op1, 1);
+ convert_op (&op11, insn);
+ convert_op (&op12, insn);
+ if (!REG_P (op11))
+ {
+ tmp = gen_reg_rtx (vmode);
+ emit_insn_before (gen_rtx_SET (tmp, op11), insn);
+ op11 = tmp;
+ }
+ return gen_rtx_UNSPEC (CCmode, gen_rtvec (2, op11, op12),
+ UNSPEC_PTEST);
+ }
else
{
convert_op (&op1, insn);
src = op1;
}
- emit_insn_before (gen_rtx_SET (tmp, src), insn);
+
+ if (!REG_P (src))
+ {
+ tmp = gen_reg_rtx (vmode);
+ emit_insn_before (gen_rtx_SET (tmp, src), insn);
+ src = tmp;
+ }
if (vmode == V2DImode)
- emit_insn_before (gen_vec_interleave_lowv2di (copy_rtx_if_shared (tmp),
- copy_rtx_if_shared (tmp),
- copy_rtx_if_shared (tmp)),
- insn);
+ {
+ tmp = gen_reg_rtx (vmode);
+ emit_insn_before (gen_vec_interleave_lowv2di (tmp, src, src), insn);
+ src = tmp;
+ }
else if (vmode == V4SImode)
- emit_insn_before (gen_sse2_pshufd (copy_rtx_if_shared (tmp),
- copy_rtx_if_shared (tmp),
- const0_rtx),
- insn);
-
- return gen_rtx_UNSPEC (CCmode, gen_rtvec (2, copy_rtx_if_shared (tmp),
- copy_rtx_if_shared (tmp)),
- UNSPEC_PTEST);
+ {
+ tmp = gen_reg_rtx (vmode);
+ emit_insn_before (gen_sse2_pshufd (tmp, src, const0_rtx), insn);
+ src = tmp;
+ }
+
+ return gen_rtx_UNSPEC (CCmode, gen_rtvec (2, src, src), UNSPEC_PTEST);
}
/* Helper function for converting INSN to vector mode. */
@@ -1289,6 +1311,9 @@ timode_scalar_chain::fix_debug_reg_uses (rtx reg)
void
timode_scalar_chain::convert_op (rtx *op, rtx_insn *insn)
{
+ if (GET_MODE (*op) == V1TImode)
+ return;
+
*op = copy_rtx_if_shared (*op);
if (REG_P (*op))
@@ -1296,19 +1321,19 @@ timode_scalar_chain::convert_op (rtx *op, rtx_insn *insn)
else if (MEM_P (*op))
{
rtx tmp = gen_reg_rtx (V1TImode);
- emit_insn_before (gen_rtx_SET (gen_rtx_SUBREG (V1TImode, tmp, 0),
+ emit_insn_before (gen_rtx_SET (tmp,
gen_gpr_to_xmm_move_src (V1TImode, *op)),
insn);
- *op = gen_rtx_SUBREG (V1TImode, tmp, 0);
+ *op = tmp;
if (dump_file)
fprintf (dump_file, " Preloading operand for insn %d into r%d\n",
INSN_UID (insn), REGNO (tmp));
}
- else if (CONST_INT_P (*op))
+ else if (CONST_SCALAR_INT_P (*op))
{
rtx vec_cst;
- rtx tmp = gen_rtx_SUBREG (V1TImode, gen_reg_rtx (TImode), 0);
+ rtx tmp = gen_reg_rtx (V1TImode);
/* Prefer all ones vector in case of -1. */
if (constm1_operand (*op, TImode))
@@ -1329,7 +1354,7 @@ timode_scalar_chain::convert_op (rtx *op, rtx_insn *insn)
emit_insn_before (seq, insn);
}
- emit_insn_before (gen_move_insn (copy_rtx (tmp), vec_cst), insn);
+ emit_insn_before (gen_move_insn (tmp, vec_cst), insn);
*op = tmp;
}
else
@@ -1609,14 +1634,26 @@ convertible_comparison_p (rtx_insn *insn, enum machine_mode mode)
rtx op2 = XEXP (src, 1);
/* *cmp<dwi>_doubleword. */
- if ((CONST_INT_P (op1)
+ if ((CONST_SCALAR_INT_P (op1)
|| ((REG_P (op1) || MEM_P (op1))
&& GET_MODE (op1) == mode))
- && (CONST_INT_P (op2)
+ && (CONST_SCALAR_INT_P (op2)
|| ((REG_P (op2) || MEM_P (op2))
&& GET_MODE (op2) == mode)))
return true;
+ /* *testti_doubleword. */
+ if (op2 == const0_rtx
+ && GET_CODE (op1) == AND
+ && REG_P (XEXP (op1, 0)))
+ {
+ rtx op12 = XEXP (op1, 1);
+ return GET_MODE (XEXP (op1, 0)) == TImode
+ && (CONST_SCALAR_INT_P (op12)
+ || ((REG_P (op12) || MEM_P (op12))
+ && GET_MODE (op12) == TImode));
+ }
+
/* *test<dwi>_not_doubleword. */
if (op2 == const0_rtx
&& GET_CODE (op1) == AND
@@ -1803,15 +1840,21 @@ timode_scalar_to_vector_candidate_p (rtx_insn *insn)
if (!MEM_P (dst)
&& GET_CODE (XEXP (src, 0)) == NOT
&& REG_P (XEXP (XEXP (src, 0), 0))
- && (REG_P (XEXP (src, 1)) || timode_mem_p (XEXP (src, 1))))
+ && (REG_P (XEXP (src, 1))
+ || CONST_SCALAR_INT_P (XEXP (src, 1))
+ || timode_mem_p (XEXP (src, 1))))
return true;
return REG_P (XEXP (src, 0))
- && (REG_P (XEXP (src, 1)) || timode_mem_p (XEXP (src, 1)));
+ && (REG_P (XEXP (src, 1))
+ || CONST_SCALAR_INT_P (XEXP (src, 1))
+ || timode_mem_p (XEXP (src, 1)));
case IOR:
case XOR:
return REG_P (XEXP (src, 0))
- && (REG_P (XEXP (src, 1)) || timode_mem_p (XEXP (src, 1)));
+ && (REG_P (XEXP (src, 1))
+ || CONST_SCALAR_INT_P (XEXP (src, 1))
+ || timode_mem_p (XEXP (src, 1)));
case NOT:
return REG_P (XEXP (src, 0)) || timode_mem_p (XEXP (src, 0));
@@ -21063,11 +21063,25 @@ ix86_rtx_costs (rtx x, machine_mode mode, int outer_code_i, int opno,
case UNSPEC:
if (XINT (x, 1) == UNSPEC_TP)
*total = 0;
- else if (XINT(x, 1) == UNSPEC_VTERNLOG)
+ else if (XINT (x, 1) == UNSPEC_VTERNLOG)
{
*total = cost->sse_op;
return true;
}
+ else if (XINT (x, 1) == UNSPEC_PTEST)
+ {
+ *total = cost->sse_op;
+ if (XVECLEN (x, 0) == 2
+ && GET_CODE (XVECEXP (x, 0, 0)) == AND)
+ {
+ rtx andop = XVECEXP (x, 0, 0);
+ *total += rtx_cost (XEXP (andop, 0), GET_MODE (andop),
+ AND, opno, speed)
+ + rtx_cost (XEXP (andop, 1), GET_MODE (andop),
+ AND, opno, speed);
+ return true;
+ }
+ }
return false;
case VEC_SELECT:
@@ -9756,6 +9756,27 @@
[(set_attr "type" "test")
(set_attr "mode" "QI")])
+;; Provide a *testti instruction that STV can implement using ptest.
+;; This pattern splits into *andti3_doubleword and *cmpti_doubleword.
+(define_insn_and_split "*testti_doubleword"
+ [(set (reg:CCZ FLAGS_REG)
+ (compare:CCZ
+ (and:TI (match_operand:TI 0 "register_operand")
+ (match_operand:TI 1 "general_operand"))
+ (const_int 0)))]
+ "TARGET_64BIT
+ && ix86_pre_reload_split ()"
+ "#"
+ "&& 1"
+ [(parallel [(set (match_dup 2) (and:TI (match_dup 0) (match_dup 1)))
+ (clobber (reg:CC FLAGS_REG))])
+ (set (reg:CCZ FLAGS_REG) (compare:CCZ (match_dup 2) (const_int 0)))]
+{
+ operands[2] = gen_reg_rtx (TImode);
+ if (!x86_64_hilo_general_operand (operands[1], TImode))
+ operands[1] = force_reg (TImode, operands[1]);
+})
+
;; Combine likes to form bit extractions for some tests. Humor it.
(define_insn_and_split "*testqi_ext_3"
[(set (match_operand 0 "flags_reg_operand")
@@ -23021,6 +23021,19 @@
(set_attr "prefix" "orig,orig,vex")
(set_attr "mode" "TI")])
+(define_insn_and_split "*ptest<mode>_and"
+ [(set (reg:CC FLAGS_REG)
+ (unspec:CC [(and:V_AVX (match_operand:V_AVX 0 "register_operand")
+ (match_operand:V_AVX 1 "vector_operand"))
+ (and:V_AVX (match_dup 0) (match_dup 1))]
+ UNSPEC_PTEST))]
+ "TARGET_SSE4_1
+ && ix86_pre_reload_split ()"
+ "#"
+ "&& 1"
+ [(set (reg:CC FLAGS_REG)
+ (unspec:CC [(match_dup 0) (match_dup 1)] UNSPEC_PTEST))])
+
(define_expand "nearbyint<mode>2"
[(set (match_operand:VFH 0 "register_operand")
(unspec:VFH
new file mode 100644
@@ -0,0 +1,11 @@
+/* { dg-do compile { target int128 } } */
+/* { dg-options "-O2 -msse4.1 -mstv -mno-stackrealign" } */
+
+__int128 a,b;
+int foo()
+{
+ return (a & b) != 0;
+}
+
+/* { dg-final { scan-assembler-not "pand" } } */
+/* { dg-final { scan-assembler "ptest" } } */