@@ -2363,32 +2363,186 @@ rtx
aarch64_gen_compare_reg (RTX_CODE code, rtx x, rtx y)
{
machine_mode cmp_mode = GET_MODE (x);
- machine_mode cc_mode;
rtx cc_reg;
if (cmp_mode == TImode)
{
- gcc_assert (code == NE);
-
- cc_mode = CCmode;
- cc_reg = gen_rtx_REG (cc_mode, CC_REGNUM);
-
rtx x_lo = operand_subword (x, 0, 0, TImode);
- rtx y_lo = operand_subword (y, 0, 0, TImode);
- emit_set_insn (cc_reg, gen_rtx_COMPARE (cc_mode, x_lo, y_lo));
-
rtx x_hi = operand_subword (x, 1, 0, TImode);
- rtx y_hi = operand_subword (y, 1, 0, TImode);
- emit_insn (gen_ccmpccdi (cc_reg, x_hi, y_hi,
- gen_rtx_EQ (cc_mode, cc_reg, const0_rtx),
- GEN_INT (aarch64_nzcv_codes[AARCH64_NE])));
+ rtx y_lo, y_hi, tmp;
+
+ if (y == const0_rtx)
+ {
+ y_lo = y_hi = y;
+ switch (code)
+ {
+ case EQ:
+ case NE:
+ /* For equality, IOR the two halves together. If this gets
+ used for a branch, we expect this to fold to cbz/cbnz;
+ otherwise it's no larger than cmp+ccmp below. Beware of
+ the compare-and-swap post-reload split and use cmp+ccmp. */
+ if (!can_create_pseudo_p ())
+ break;
+ tmp = gen_reg_rtx (DImode);
+ emit_insn (gen_iordi3 (tmp, x_hi, x_lo));
+ emit_insn (gen_cmpdi (tmp, const0_rtx));
+ cc_reg = gen_rtx_REG (CCmode, CC_REGNUM);
+ goto done;
+
+ case LT:
+ case GE:
+ /* Check only the sign bit. Choose to expose this detail,
+ lest something later tries to use a COMPARE in a way
+ that doesn't correspond. This is "tst". */
+ cc_reg = gen_rtx_REG (CC_NZmode, CC_REGNUM);
+ tmp = gen_rtx_AND (DImode, x_hi, GEN_INT (INT64_MIN));
+ tmp = gen_rtx_COMPARE (CC_NZmode, tmp, const0_rtx);
+ emit_set_insn (cc_reg, tmp);
+ code = (code == LT ? NE : EQ);
+ goto done;
+
+ case LE:
+ case GT:
+ /* For GT, (x_hi >= 0) && ((x_hi | x_lo) != 0),
+ and of course the inverse for LE. */
+ emit_insn (gen_cmpdi (x_hi, const0_rtx));
+
+ tmp = gen_reg_rtx (DImode);
+ emit_insn (gen_iordi3 (tmp, x_hi, x_lo));
+
+ /* Combine the two terms:
+ (GE ? (compare tmp 0) : EQ),
+ so that the whole term is true for NE, false for EQ. */
+ cc_reg = gen_rtx_REG (CCmode, CC_REGNUM);
+ emit_insn (gen_ccmpccdi
+ (cc_reg, tmp, const0_rtx,
+ gen_rtx_GE (VOIDmode, cc_reg, const0_rtx),
+ GEN_INT (aarch64_nzcv_codes[AARCH64_EQ])));
+
+ /* The result is entirely within the Z bit. */
+ code = (code == GT ? NE : EQ);
+ goto done;
+
+ default:
+ break;
+ }
+ }
+ else
+ {
+ y_lo = operand_subword (y, 0, 0, TImode);
+ y_hi = operand_subword (y, 1, 0, TImode);
+ }
+
+ cc_reg = gen_rtx_REG (CCmode, CC_REGNUM);
+ switch (code)
+ {
+ case EQ:
+ case NE:
+ /* For EQ, (x_lo == y_lo) && (x_hi == y_hi). */
+ emit_insn (gen_cmpdi (x_lo, y_lo));
+ emit_insn (gen_ccmpccdi (cc_reg, x_hi, y_hi,
+ gen_rtx_EQ (VOIDmode, cc_reg, const0_rtx),
+ GEN_INT (aarch64_nzcv_codes[AARCH64_NE])));
+ break;
+
+ case LEU:
+ case GTU:
+ std::swap (x_lo, y_lo);
+ std::swap (x_hi, y_hi);
+ code = swap_condition (code);
+ /* fall through */
+
+ case LTU:
+ case GEU:
+ /* For LTU, (x - y), as double-word arithmetic. */
+ emit_insn (gen_cmpdi (x_lo, y_lo));
+ /* The ucmp*_carryinC pattern uses zero_extend, and so cannot
+ take the constant 0 we allow elsewhere. Force to reg now
+ and allow combine to eliminate via simplification. */
+ x_hi = force_reg (DImode, x_hi);
+ y_hi = force_reg (DImode, y_hi);
+ emit_insn (gen_ucmpdi3_carryinC(x_hi, y_hi));
+ /* The result is entirely within the C bit. */
+ break;
+
+ case LE:
+ case GT:
+ /*
+ * For LE,
+ * !((x_hi > y_hi) || (x_hi == y_hi && x_lo > y_lo))
+ * -> !(x_hi > y_hi) && !(x_hi == y_hi && x_lo > y_lo)
+ * -> (x_hi <= y_hi) && !(x_hi == y_hi && x_lo > y_lo)
+ */
+
+ /* Compute the first term (x_hi <= y_hi) and save it in tmp. */
+ tmp = gen_reg_rtx (SImode);
+ emit_insn (gen_cmpdi (x_hi, y_hi));
+ emit_set_insn (tmp, gen_rtx_LE (SImode, cc_reg, const0_rtx));
+
+ /* Compute the second term (x_hi == y_hi && x_lo > y_lo):
+ (EQ ? (compare x_lo y_lo) : LE),
+ so that the whole term is true for GT, false for LE. */
+ emit_insn (gen_ccmpccdi (cc_reg, x_lo, y_lo,
+ gen_rtx_EQ (VOIDmode, cc_reg, const0_rtx),
+ GEN_INT (aarch64_nzcv_codes[AARCH64_LE])));
+
+ /* Combine the two terms. Since we want !(second_term):
+ (LE ? (compare tmp 0) : EQ),
+ so that the whole term is true for NE, false for EQ. */
+ emit_insn (gen_ccmpccsi (cc_reg, tmp, const0_rtx,
+ gen_rtx_LE (VOIDmode, cc_reg, const0_rtx),
+ GEN_INT (aarch64_nzcv_codes[AARCH64_EQ])));
+
+ /* The result is entirely within the Z bit. */
+ code = (code == GE ? NE : EQ);
+ break;
+
+ case LT:
+ case GE:
+ /*
+ * For GE,
+ * !((x_hi < y_hi) || (x_hi == y_hi && x_lo < y_lo))
+ * -> !(x_hi < y_hi) && !(x_hi == y_hi && x_lo < y_lo)
+ * -> (x_hi >= y_hi) && !(x_hi == y_hi && x_lo < y_lo)
+ * and of course the inverse for LT.
+ */
+
+ /* Compute the first term (x_hi >= y_hi) and save it in tmp. */
+ tmp = gen_reg_rtx (SImode);
+ emit_insn (gen_cmpdi (x_hi, y_hi));
+ emit_set_insn (tmp, gen_rtx_GE (SImode, cc_reg, const0_rtx));
+
+ /* Compute the second term (x_hi == y_hi && x_lo < y_lo):
+ (EQ ? (compare x_lo y_lo) : GE),
+ so that the whole term is true for LT, false for GE. */
+ emit_insn (gen_ccmpccdi (cc_reg, x_lo, y_lo,
+ gen_rtx_EQ (VOIDmode, cc_reg, const0_rtx),
+ GEN_INT (aarch64_nzcv_codes[AARCH64_GE])));
+
+ /* Combine the two terms. Since we want !(second_term):
+ (GE ? (compare tmp 0) : EQ),
+ so that the whole term is true for NE, false for EQ. */
+ emit_insn (gen_ccmpccsi (cc_reg, tmp, const0_rtx,
+ gen_rtx_GE (VOIDmode, cc_reg, const0_rtx),
+ GEN_INT (aarch64_nzcv_codes[AARCH64_EQ])));
+
+ /* The result is entirely within the Z bit. */
+ code = (code == GE ? NE : EQ);
+ break;
+
+ default:
+ gcc_unreachable ();
+ }
}
else
{
- cc_mode = SELECT_CC_MODE (code, x, y);
+ machine_mode cc_mode = SELECT_CC_MODE (code, x, y);
cc_reg = gen_rtx_REG (cc_mode, CC_REGNUM);
emit_set_insn (cc_reg, gen_rtx_COMPARE (cc_mode, x, y));
}
+
+ done:
return gen_rtx_fmt_ee (code, VOIDmode, cc_reg, const0_rtx);
}
@@ -471,6 +471,20 @@
operands[2] = const0_rtx;
})
+(define_expand "cbranchti4"
+ [(set (pc) (if_then_else (match_operator 0 "aarch64_comparison_operator"
+ [(match_operand:TI 1 "register_operand")
+ (match_operand:TI 2 "aarch64_reg_or_zero")])
+ (label_ref (match_operand 3 "" ""))
+ (pc)))]
+ ""
+{
+ operands[0] = aarch64_gen_compare_reg (GET_CODE (operands[0]), operands[1],
+ operands[2]);
+ operands[1] = XEXP (operands[0], 0);
+ operands[2] = const0_rtx;
+})
+
(define_expand "cbranch<mode>4"
[(set (pc) (if_then_else (match_operator 0 "aarch64_comparison_operator"
[(match_operand:GPF 1 "register_operand")
@@ -4144,6 +4158,20 @@
operands[3] = const0_rtx;
})
+(define_expand "cstoreti4"
+ [(set (match_operand:SI 0 "register_operand")
+ (match_operator:SI 1 "aarch64_comparison_operator"
+ [(match_operand:TI 2 "register_operand")
+ (match_operand:TI 3 "aarch64_reg_or_zero")]))]
+ ""
+{
+ operands[1] = aarch64_gen_compare_reg (GET_CODE (operands[1]), operands[2],
+ operands[3]);
+ PUT_MODE (operands[1], SImode);
+ operands[2] = XEXP (operands[1], 0);
+ operands[3] = const0_rtx;
+})
+
(define_expand "cstorecc4"
[(set (match_operand:SI 0 "register_operand")
(match_operator 1 "aarch64_comparison_operator_mode"