diff mbox series

[6/6] aarch64: Implement TImode comparisons

Message ID 20200319064805.17739-7-richard.henderson@linaro.org
State New
Headers show
Series aarch64: Implement TImode comparisons | expand

Commit Message

Jeff Law via Gcc-patches March 19, 2020, 6:48 a.m. UTC
Use ccmp to perform all TImode comparisons branchless.

	* config/aarch64/aarch64.c (aarch64_gen_compare_reg): Expand all of
	the comparisons for TImode, not just NE.
	* config/aarch64/aarch64.md (cbranchti4, cstoreti4): New.
---
 gcc/config/aarch64/aarch64.c  | 182 +++++++++++++++++++++++++++++++---
 gcc/config/aarch64/aarch64.md |  28 ++++++
 2 files changed, 196 insertions(+), 14 deletions(-)
diff mbox series

Patch

diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
index d7899dad759..911dc1c91cd 100644
--- a/gcc/config/aarch64/aarch64.c
+++ b/gcc/config/aarch64/aarch64.c
@@ -2363,32 +2363,186 @@  rtx
 aarch64_gen_compare_reg (RTX_CODE code, rtx x, rtx y)
 {
   machine_mode cmp_mode = GET_MODE (x);
-  machine_mode cc_mode;
   rtx cc_reg;
 
   if (cmp_mode == TImode)
     {
-      gcc_assert (code == NE);
-
-      cc_mode = CCmode;
-      cc_reg = gen_rtx_REG (cc_mode, CC_REGNUM);
-
       rtx x_lo = operand_subword (x, 0, 0, TImode);
-      rtx y_lo = operand_subword (y, 0, 0, TImode);
-      emit_set_insn (cc_reg, gen_rtx_COMPARE (cc_mode, x_lo, y_lo));
-
       rtx x_hi = operand_subword (x, 1, 0, TImode);
-      rtx y_hi = operand_subword (y, 1, 0, TImode);
-      emit_insn (gen_ccmpccdi (cc_reg, x_hi, y_hi,
-			       gen_rtx_EQ (cc_mode, cc_reg, const0_rtx),
-			       GEN_INT (aarch64_nzcv_codes[AARCH64_NE])));
+      rtx y_lo, y_hi, tmp;
+
+      if (y == const0_rtx)
+	{
+	  y_lo = y_hi = y;
+	  switch (code)
+	    {
+	    case EQ:
+	    case NE:
+	      /* For equality, IOR the two halves together.  If this gets
+		 used for a branch, we expect this to fold to cbz/cbnz;
+		 otherwise it's no larger than cmp+ccmp below.  Beware of
+		 the compare-and-swap post-reload split and use cmp+ccmp.  */
+	      if (!can_create_pseudo_p ())
+		break;
+	      tmp = gen_reg_rtx (DImode);
+	      emit_insn (gen_iordi3 (tmp, x_hi, x_lo));
+	      emit_insn (gen_cmpdi (tmp, const0_rtx));
+	      cc_reg = gen_rtx_REG (CCmode, CC_REGNUM);
+	      goto done;
+
+	    case LT:
+	    case GE:
+	      /* Check only the sign bit.  Choose to expose this detail,
+		 lest something later tries to use a COMPARE in a way
+		 that doesn't correspond.  This is "tst".  */
+	      cc_reg = gen_rtx_REG (CC_NZmode, CC_REGNUM);
+	      tmp = gen_rtx_AND (DImode, x_hi, GEN_INT (INT64_MIN));
+	      tmp = gen_rtx_COMPARE (CC_NZmode, tmp, const0_rtx);
+	      emit_set_insn (cc_reg, tmp);
+	      code = (code == LT ? NE : EQ);
+	      goto done;
+
+	    case LE:
+	    case GT:
+	      /* For GT, (x_hi >= 0) && ((x_hi | x_lo) != 0),
+		 and of course the inverse for LE.  */
+	      emit_insn (gen_cmpdi (x_hi, const0_rtx));
+
+	      tmp = gen_reg_rtx (DImode);
+	      emit_insn (gen_iordi3 (tmp, x_hi, x_lo));
+
+	      /* Combine the two terms:
+		 (GE ? (compare tmp 0) : EQ),
+		 so that the whole term is true for NE, false for EQ.  */
+	      cc_reg = gen_rtx_REG (CCmode, CC_REGNUM);
+	      emit_insn (gen_ccmpccdi
+			 (cc_reg, tmp, const0_rtx,
+			  gen_rtx_GE (VOIDmode, cc_reg, const0_rtx),
+			  GEN_INT (aarch64_nzcv_codes[AARCH64_EQ])));
+
+	      /* The result is entirely within the Z bit. */
+	      code = (code == GT ? NE : EQ);
+	      goto done;
+
+	    default:
+	      break;
+	    }
+	}
+      else
+	{
+	  y_lo = operand_subword (y, 0, 0, TImode);
+	  y_hi = operand_subword (y, 1, 0, TImode);
+	}
+
+      cc_reg = gen_rtx_REG (CCmode, CC_REGNUM);
+      switch (code)
+	{
+	case EQ:
+	case NE:
+	  /* For EQ, (x_lo == y_lo) && (x_hi == y_hi).  */
+	  emit_insn (gen_cmpdi (x_lo, y_lo));
+	  emit_insn (gen_ccmpccdi (cc_reg, x_hi, y_hi,
+				   gen_rtx_EQ (VOIDmode, cc_reg, const0_rtx),
+				   GEN_INT (aarch64_nzcv_codes[AARCH64_NE])));
+	  break;
+
+	case LEU:
+	case GTU:
+	  std::swap (x_lo, y_lo);
+	  std::swap (x_hi, y_hi);
+	  code = swap_condition (code);
+	  /* fall through */
+
+	case LTU:
+	case GEU:
+	  /* For LTU, (x - y), as double-word arithmetic.  */
+	  emit_insn (gen_cmpdi (x_lo, y_lo));
+	  /* The ucmp*_carryinC pattern uses zero_extend, and so cannot
+	     take the constant 0 we allow elsewhere.  Force to reg now
+	     and allow combine to eliminate via simplification.  */
+	  x_hi = force_reg (DImode, x_hi);
+	  y_hi = force_reg (DImode, y_hi);
+	  emit_insn (gen_ucmpdi3_carryinC(x_hi, y_hi));
+	  /* The result is entirely within the C bit. */
+	  break;
+
+	case LE:
+	case GT:
+	  /*
+	   * For LE,
+	   *    !((x_hi > y_hi) || (x_hi == y_hi && x_lo > y_lo))
+	   * -> !(x_hi > y_hi) && !(x_hi == y_hi && x_lo > y_lo)
+	   * -> (x_hi <= y_hi) && !(x_hi == y_hi && x_lo > y_lo)
+	   */
+
+	  /* Compute the first term (x_hi <= y_hi) and save it in tmp. */
+	  tmp = gen_reg_rtx (SImode);
+	  emit_insn (gen_cmpdi (x_hi, y_hi));
+	  emit_set_insn (tmp, gen_rtx_LE (SImode, cc_reg, const0_rtx));
+
+	  /* Compute the second term (x_hi == y_hi && x_lo > y_lo):
+	     (EQ ? (compare x_lo y_lo) : LE),
+	     so that the whole term is true for GT, false for LE.  */
+	  emit_insn (gen_ccmpccdi (cc_reg, x_lo, y_lo,
+				   gen_rtx_EQ (VOIDmode, cc_reg, const0_rtx),
+				   GEN_INT (aarch64_nzcv_codes[AARCH64_LE])));
+
+	  /* Combine the two terms.  Since we want !(second_term):
+	     (LE ? (compare tmp 0) : EQ),
+	     so that the whole term is true for NE, false for EQ.  */
+	  emit_insn (gen_ccmpccsi (cc_reg, tmp, const0_rtx,
+				   gen_rtx_LE (VOIDmode, cc_reg, const0_rtx),
+				   GEN_INT (aarch64_nzcv_codes[AARCH64_EQ])));
+
+	  /* The result is entirely within the Z bit. */
+	  code = (code == GE ? NE : EQ);
+	  break;
+
+	case LT:
+	case GE:
+	  /*
+	   * For GE,
+	   *    !((x_hi < y_hi) || (x_hi == y_hi && x_lo < y_lo))
+	   * -> !(x_hi < y_hi) && !(x_hi == y_hi && x_lo < y_lo)
+	   * -> (x_hi >= y_hi) && !(x_hi == y_hi && x_lo < y_lo)
+	   * and of course the inverse for LT.
+	   */
+
+	  /* Compute the first term (x_hi >= y_hi) and save it in tmp. */
+	  tmp = gen_reg_rtx (SImode);
+	  emit_insn (gen_cmpdi (x_hi, y_hi));
+	  emit_set_insn (tmp, gen_rtx_GE (SImode, cc_reg, const0_rtx));
+
+	  /* Compute the second term (x_hi == y_hi && x_lo < y_lo):
+	     (EQ ? (compare x_lo y_lo) : GE),
+	     so that the whole term is true for LT, false for GE.  */
+	  emit_insn (gen_ccmpccdi (cc_reg, x_lo, y_lo,
+				   gen_rtx_EQ (VOIDmode, cc_reg, const0_rtx),
+				   GEN_INT (aarch64_nzcv_codes[AARCH64_GE])));
+
+	  /* Combine the two terms.  Since we want !(second_term):
+	     (GE ? (compare tmp 0) : EQ),
+	     so that the whole term is true for NE, false for EQ.  */
+	  emit_insn (gen_ccmpccsi (cc_reg, tmp, const0_rtx,
+				   gen_rtx_GE (VOIDmode, cc_reg, const0_rtx),
+				   GEN_INT (aarch64_nzcv_codes[AARCH64_EQ])));
+
+	  /* The result is entirely within the Z bit. */
+	  code = (code == GE ? NE : EQ);
+	  break;
+
+	default:
+	  gcc_unreachable ();
+	}
     }
   else
     {
-      cc_mode = SELECT_CC_MODE (code, x, y);
+      machine_mode cc_mode = SELECT_CC_MODE (code, x, y);
       cc_reg = gen_rtx_REG (cc_mode, CC_REGNUM);
       emit_set_insn (cc_reg, gen_rtx_COMPARE (cc_mode, x, y));
     }
+
+ done:
   return gen_rtx_fmt_ee (code, VOIDmode, cc_reg, const0_rtx);
 }
 
diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md
index c789b641e7c..fb076b60e3c 100644
--- a/gcc/config/aarch64/aarch64.md
+++ b/gcc/config/aarch64/aarch64.md
@@ -471,6 +471,20 @@ 
   operands[2] = const0_rtx;
 })
 
+(define_expand "cbranchti4"
+  [(set (pc) (if_then_else (match_operator 0 "aarch64_comparison_operator"
+			    [(match_operand:TI 1 "register_operand")
+			     (match_operand:TI 2 "aarch64_reg_or_zero")])
+			   (label_ref (match_operand 3 "" ""))
+			   (pc)))]
+  ""
+{
+  operands[0] = aarch64_gen_compare_reg (GET_CODE (operands[0]), operands[1],
+					 operands[2]);
+  operands[1] = XEXP (operands[0], 0);
+  operands[2] = const0_rtx;
+})
+
 (define_expand "cbranch<mode>4"
   [(set (pc) (if_then_else (match_operator 0 "aarch64_comparison_operator"
 			    [(match_operand:GPF 1 "register_operand")
@@ -4144,6 +4158,20 @@ 
   operands[3] = const0_rtx;
 })
 
+(define_expand "cstoreti4"
+  [(set (match_operand:SI 0 "register_operand")
+	(match_operator:SI 1 "aarch64_comparison_operator"
+	 [(match_operand:TI 2 "register_operand")
+	  (match_operand:TI 3 "aarch64_reg_or_zero")]))]
+  ""
+{
+  operands[1] = aarch64_gen_compare_reg (GET_CODE (operands[1]), operands[2],
+				         operands[3]);
+  PUT_MODE (operands[1], SImode);
+  operands[2] = XEXP (operands[1], 0);
+  operands[3] = const0_rtx;
+})
+
 (define_expand "cstorecc4"
   [(set (match_operand:SI 0 "register_operand")
        (match_operator 1 "aarch64_comparison_operator_mode"