[committed,AArch64] Rework SVE integer comparisons
diff mbox series

Message ID mptr25onn9k.fsf@arm.com
State New
Headers show
Series
  • [committed,AArch64] Rework SVE integer comparisons
Related show

Commit Message

Richard Sandiford Aug. 14, 2019, 8:51 a.m. UTC
The remaining uses of UNSPEC_MERGE_PTRUE were in integer comparison
patterns.  These aren't actually merging operations but zeroing ones,
although there's no practical difference when the predicate is a PTRUE.

All comparisons produced by expand are predicated on a PTRUE,
although we try to pattern-match a compare-and-AND as a predicated
comparison during combine.

Like previous patches, this one rearranges things in a way that works
better with the ACLE, where the initial predicate might or might not
be a PTRUE.  The new patterns use UNSPEC_PRED_Z to represent zeroing
predication, with a aarch64_sve_ptrue_flag to record whether the
predicate is all-true (as for UNSPEC_PTEST).

See the block comment in the patch for more details.

Tested on aarch64-linux-gnu (with and without SVE) and aarch64_be-elf.
Applied as r274429.

Richard


2019-08-14  Richard Sandiford  <richard.sandiford@arm.com>

gcc/
	* config/aarch64/aarch64-protos.h (aarch64_sve_same_pred_for_ptest_p):
	Declare.
	* config/aarch64/aarch64.c (aarch64_sve_same_pred_for_ptest_p)
	(aarch64_sve_emit_int_cmp): New functions.
	(aarch64_convert_sve_data_to_pred): Use aarch64_sve_emit_int_cmp.
	(aarch64_sve_cmp_operand_p, aarch64_emit_sve_ptrue_op_cc): Delete.
	(aarch64_expand_sve_vec_cmp_int): Use aarch64_sve_emit_int_cmp.
	* config/aarch64/aarch64.md (UNSPEC_MERGE_PTRUE): Delete.
	(UNSPEC_PRED_Z): New unspec.
	(set_clobber_cc_nzc): Delete.
	* config/aarch64/aarch64-sve.md: Add a block comment about
	UNSPEC_PRED_Z.
	(*cmp<SVE_INT_CMP:cmp_op><mode>): Rename to...
	(@aarch64_pred_cmp<SVE_INT_CMP:cmp_op><mode>): ...this, replacing
	the old pattern with that name.  Use UNSPEC_PRED_Z instead of
	UNSPEC_MERGE_PTRUE.
	(*cmp<SVE_INT_CMP:cmp_op><mode>_cc): Use UNSPEC_PRED_Z instead of
	UNSPEC_MERGE_PTRUE.  Use aarch64_sve_same_pred_for_ptest_p to
	check for compatible predicates.
	(*cmp<cmp_op><SVE_INT_CMP:mode>_ptest): Likewise.
	(*cmp<cmp_op><mode>_and): Match a known-ptrue UNSPEC_PRED_Z instead
	of UNSPEC_MERGE_PTRUE.  Split into the new form of predicated
	comparisons above.

Patch
diff mbox series

Index: gcc/config/aarch64/aarch64-protos.h
===================================================================
--- gcc/config/aarch64/aarch64-protos.h	2019-08-14 09:15:57.609828019 +0100
+++ gcc/config/aarch64/aarch64-protos.h	2019-08-14 09:47:31.355831192 +0100
@@ -555,6 +555,7 @@  void aarch64_expand_mov_immediate (rtx,
 rtx aarch64_ptrue_reg (machine_mode);
 rtx aarch64_pfalse_reg (machine_mode);
 bool aarch64_sve_pred_dominates_p (rtx *, rtx);
+bool aarch64_sve_same_pred_for_ptest_p (rtx *, rtx *);
 void aarch64_emit_sve_pred_move (rtx, rtx, rtx);
 void aarch64_expand_sve_mem_move (rtx, rtx, machine_mode);
 bool aarch64_maybe_expand_sve_subreg_move (rtx, rtx);
Index: gcc/config/aarch64/aarch64.c
===================================================================
--- gcc/config/aarch64/aarch64.c	2019-08-14 09:45:45.464613673 +0100
+++ gcc/config/aarch64/aarch64.c	2019-08-14 09:47:31.355831192 +0100
@@ -2783,6 +2783,48 @@  aarch64_sve_pred_dominates_p (rtx *pred1
 	  || rtx_equal_p (pred1[0], pred2));
 }
 
+/* PRED1[0] is a PTEST predicate and PRED1[1] is an aarch64_sve_ptrue_flag
+   for it.  PRED2[0] is the predicate for the instruction whose result
+   is tested by the PTEST and PRED2[1] is again an aarch64_sve_ptrue_flag
+   for it.  Return true if we can prove that the two predicates are
+   equivalent for PTEST purposes; that is, if we can replace PRED2[0]
+   with PRED1[0] without changing behavior.  */
+
+bool
+aarch64_sve_same_pred_for_ptest_p (rtx *pred1, rtx *pred2)
+{
+  machine_mode mode = GET_MODE (pred1[0]);
+  gcc_assert (GET_MODE_CLASS (mode) == MODE_VECTOR_BOOL
+	      && mode == GET_MODE (pred2[0])
+	      && aarch64_sve_ptrue_flag (pred1[1], SImode)
+	      && aarch64_sve_ptrue_flag (pred2[1], SImode));
+
+  bool ptrue1_p = (pred1[0] == CONSTM1_RTX (mode)
+		   || INTVAL (pred1[1]) == SVE_KNOWN_PTRUE);
+  bool ptrue2_p = (pred2[0] == CONSTM1_RTX (mode)
+		   || INTVAL (pred2[1]) == SVE_KNOWN_PTRUE);
+  return (ptrue1_p && ptrue2_p) || rtx_equal_p (pred1[0], pred2[0]);
+}
+
+/* Emit a comparison CMP between OP0 and OP1, both of which have mode
+   DATA_MODE, and return the result in a predicate of mode PRED_MODE.
+   Use TARGET as the target register if nonnull and convenient.  */
+
+static rtx
+aarch64_sve_emit_int_cmp (rtx target, machine_mode pred_mode, rtx_code cmp,
+			  machine_mode data_mode, rtx op1, rtx op2)
+{
+  insn_code icode = code_for_aarch64_pred_cmp (cmp, data_mode);
+  expand_operand ops[5];
+  create_output_operand (&ops[0], target, pred_mode);
+  create_input_operand (&ops[1], CONSTM1_RTX (pred_mode), pred_mode);
+  create_integer_operand (&ops[2], SVE_KNOWN_PTRUE);
+  create_input_operand (&ops[3], op1, data_mode);
+  create_input_operand (&ops[4], op2, data_mode);
+  expand_insn (icode, 5, ops);
+  return ops[0].value;
+}
+
 /* Use a comparison to convert integer vector SRC into MODE, which is
    the corresponding SVE predicate mode.  Use TARGET for the result
    if it's nonnull and convenient.  */
@@ -2791,14 +2833,8 @@  aarch64_sve_pred_dominates_p (rtx *pred1
 aarch64_convert_sve_data_to_pred (rtx target, machine_mode mode, rtx src)
 {
   machine_mode src_mode = GET_MODE (src);
-  insn_code icode = code_for_aarch64_pred_cmp (NE, src_mode);
-  expand_operand ops[4];
-  create_output_operand (&ops[0], target, mode);
-  create_input_operand (&ops[1], CONSTM1_RTX (mode), mode);
-  create_input_operand (&ops[2], src, src_mode);
-  create_input_operand (&ops[3], CONST0_RTX (src_mode), src_mode);
-  expand_insn (icode, 4, ops);
-  return ops[0].value;
+  return aarch64_sve_emit_int_cmp (target, mode, NE, src_mode,
+				   src, CONST0_RTX (src_mode));
 }
 
 /* Return true if we can move VALUE into a register using a single
@@ -17667,51 +17703,6 @@  aarch64_reverse_mask (machine_mode mode,
   return force_reg (V16QImode, mask);
 }
 
-/* Return true if X is a valid second operand for the SVE instruction
-   that implements integer comparison OP_CODE.  */
-
-static bool
-aarch64_sve_cmp_operand_p (rtx_code op_code, rtx x)
-{
-  if (register_operand (x, VOIDmode))
-    return true;
-
-  switch (op_code)
-    {
-    case LTU:
-    case LEU:
-    case GEU:
-    case GTU:
-      return aarch64_sve_cmp_immediate_p (x, false);
-    case LT:
-    case LE:
-    case GE:
-    case GT:
-    case NE:
-    case EQ:
-      return aarch64_sve_cmp_immediate_p (x, true);
-    default:
-      gcc_unreachable ();
-    }
-}
-
-/* Use predicated SVE instructions to implement the equivalent of:
-
-     (set TARGET OP)
-
-   given that PTRUE is an all-true predicate of the appropriate mode
-   and that the instruction clobbers the condition codes.  */
-
-static void
-aarch64_emit_sve_ptrue_op_cc (rtx target, rtx ptrue, rtx op)
-{
-  rtx unspec = gen_rtx_UNSPEC (GET_MODE (target),
-			       gen_rtvec (2, ptrue, op),
-			       UNSPEC_MERGE_PTRUE);
-  rtx_insn *insn = emit_insn (gen_set_clobber_cc_nzc (target, unspec));
-  set_unique_reg_note (insn, REG_EQUAL, copy_rtx (op));
-}
-
 /* Expand an SVE integer comparison using the SVE equivalent of:
 
      (set TARGET (CODE OP0 OP1)).  */
@@ -17721,13 +17712,10 @@  aarch64_expand_sve_vec_cmp_int (rtx targ
 {
   machine_mode pred_mode = GET_MODE (target);
   machine_mode data_mode = GET_MODE (op0);
-
-  if (!aarch64_sve_cmp_operand_p (code, op1))
-    op1 = force_reg (data_mode, op1);
-
-  rtx ptrue = aarch64_ptrue_reg (pred_mode);
-  rtx cond = gen_rtx_fmt_ee (code, pred_mode, op0, op1);
-  aarch64_emit_sve_ptrue_op_cc (target, ptrue, cond);
+  rtx res = aarch64_sve_emit_int_cmp (target, pred_mode, code, data_mode,
+				      op0, op1);
+  if (!rtx_equal_p (target, res))
+    emit_move_insn (target, res);
 }
 
 /* Return the UNSPEC_COND_* code for comparison CODE.  */
Index: gcc/config/aarch64/aarch64.md
===================================================================
--- gcc/config/aarch64/aarch64.md	2019-08-14 09:45:45.464613673 +0100
+++ gcc/config/aarch64/aarch64.md	2019-08-14 09:47:31.359831162 +0100
@@ -219,8 +219,8 @@  (define_c_enum "unspec" [
     UNSPEC_LD1RQ
     UNSPEC_LD1_GATHER
     UNSPEC_ST1_SCATTER
-    UNSPEC_MERGE_PTRUE
     UNSPEC_PRED_X
+    UNSPEC_PRED_Z
     UNSPEC_PTEST
     UNSPEC_UNPACKSHI
     UNSPEC_UNPACKUHI
@@ -7155,12 +7155,6 @@  (define_insn "bti_jc"
   [(set_attr "type" "no_insn")]
 )
 
-;; Helper for aarch64.c code.
-(define_expand "set_clobber_cc_nzc"
-  [(parallel [(set (match_operand 0)
-		   (match_operand 1))
-	      (clobber (reg:CC_NZC CC_REGNUM))])])
-
 ;; Hard speculation barrier.
 (define_insn "speculation_barrier"
   [(unspec_volatile [(const_int 0)] UNSPECV_SPECULATION_BARRIER)]
Index: gcc/config/aarch64/aarch64-sve.md
===================================================================
--- gcc/config/aarch64/aarch64-sve.md	2019-08-14 09:45:45.460613703 +0100
+++ gcc/config/aarch64/aarch64-sve.md	2019-08-14 09:47:31.355831192 +0100
@@ -24,6 +24,7 @@ 
 ;; == General notes
 ;; ---- Note on the handling of big-endian SVE
 ;; ---- Description of UNSPEC_PTEST
+;; ---- Description of UNSPEC_PRED_Z
 ;; ---- Note on predicated integer arithemtic and UNSPEC_PRED_X
 ;; ---- Note on predicated FP arithmetic patterns and GP "strictness"
 ;;
@@ -231,6 +232,52 @@ 
 ;; - OP is the predicate we want to test, of the same mode as CAST_GP.
 ;;
 ;; -------------------------------------------------------------------------
+;; ---- Description of UNSPEC_PRED_Z
+;; -------------------------------------------------------------------------
+;;
+;; SVE integer comparisons are predicated and return zero for inactive
+;; lanes.  Sometimes we use them with predicates that are all-true and
+;; sometimes we use them with general predicates.
+;;
+;; The integer comparisons also set the flags and so build-in the effect
+;; of a PTEST.  We therefore want to be able to combine integer comparison
+;; patterns with PTESTs of the result.  One difficulty with doing this is
+;; that (as noted above) the PTEST is always a .B operation and so can place
+;; stronger requirements on the governing predicate than the comparison does.
+;;
+;; For example, when applying a separate PTEST to the result of a full-vector
+;; .H comparison, the PTEST must be predicated on a .H PTRUE instead of a
+;; .B PTRUE.  In constrast, the comparison might be predicated on either
+;; a .H PTRUE or a .B PTRUE, since the values of odd-indexed predicate
+;; bits don't matter for .H operations.
+;;
+;; We therefore can't rely on a full-vector comparison using the same
+;; predicate register as a following PTEST.  We instead need to remember
+;; whether a comparison is known to be a full-vector comparison and use
+;; this information in addition to a check for equal predicate registers.
+;; At the same time, it's useful to have a common representation for all
+;; integer comparisons, so that they can be handled by a single set of
+;; patterns.
+;;
+;; We therefore take a similar approach to UNSPEC_PTEST above and use:
+;;
+;;   (unspec:<M:VPRED> [gp ptrue_flag (code:M op0 op1)] UNSPEC_PRED_Z)
+;;
+;; where:
+;;
+;; - GP is the governing predicate, of mode <M:VPRED>
+;;
+;; - PTRUE_FLAG is a CONST_INT (conceptually of mode SI) that has the value
+;;   SVE_KNOWN_PTRUE if we know that GP is all-true and SVE_MAYBE_NOT_PTRUE
+;;   otherwise
+;;
+;; - CODE is the comparison code
+;;
+;; - OP0 and OP1 are the values being compared, of mode M
+;;
+;; The "Z" in UNSPEC_PRED_Z indicates that inactive lanes are zero.
+;;
+;; -------------------------------------------------------------------------
 ;; ---- Note on predicated integer arithemtic and UNSPEC_PRED_X
 ;; -------------------------------------------------------------------------
 ;;
@@ -3008,115 +3055,119 @@  (define_expand "vec_cmpu<mode><vpred>"
   }
 )
 
-;; Integer comparisons predicated with a PTRUE.
-(define_insn "*cmp<cmp_op><mode>"
+;; Predicated integer comparisons.
+(define_insn "@aarch64_pred_cmp<cmp_op><mode>"
   [(set (match_operand:<VPRED> 0 "register_operand" "=Upa, Upa")
 	(unspec:<VPRED>
 	  [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
+	   (match_operand:SI 2 "aarch64_sve_ptrue_flag")
 	   (SVE_INT_CMP:<VPRED>
-	     (match_operand:SVE_I 2 "register_operand" "w, w")
-	     (match_operand:SVE_I 3 "aarch64_sve_cmp_<sve_imm_con>_operand" "<sve_imm_con>, w"))]
-	  UNSPEC_MERGE_PTRUE))
+	     (match_operand:SVE_I 3 "register_operand" "w, w")
+	     (match_operand:SVE_I 4 "aarch64_sve_cmp_<sve_imm_con>_operand" "<sve_imm_con>, w"))]
+	  UNSPEC_PRED_Z))
    (clobber (reg:CC_NZC CC_REGNUM))]
   "TARGET_SVE"
   "@
-   cmp<cmp_op>\t%0.<Vetype>, %1/z, %2.<Vetype>, #%3
-   cmp<cmp_op>\t%0.<Vetype>, %1/z, %2.<Vetype>, %3.<Vetype>"
+   cmp<cmp_op>\t%0.<Vetype>, %1/z, %3.<Vetype>, #%4
+   cmp<cmp_op>\t%0.<Vetype>, %1/z, %3.<Vetype>, %4.<Vetype>"
 )
 
-;; Integer comparisons predicated with a PTRUE in which both the flag and
-;; predicate results are interesting.
-(define_insn "*cmp<cmp_op><mode>_cc"
+;; Predicated integer comparisons in which both the flag and predicate
+;; results are interesting.
+(define_insn_and_rewrite "*cmp<cmp_op><mode>_cc"
   [(set (reg:CC_NZC CC_REGNUM)
 	(unspec:CC_NZC
 	  [(match_operand:VNx16BI 1 "register_operand" "Upl, Upl")
 	   (match_operand 4)
 	   (match_operand:SI 5 "aarch64_sve_ptrue_flag")
 	   (unspec:<VPRED>
-	     [(match_dup 4)
+	     [(match_operand 6)
+	      (match_operand:SI 7 "aarch64_sve_ptrue_flag")
 	      (SVE_INT_CMP:<VPRED>
 		(match_operand:SVE_I 2 "register_operand" "w, w")
 		(match_operand:SVE_I 3 "aarch64_sve_cmp_<sve_imm_con>_operand" "<sve_imm_con>, w"))]
-	     UNSPEC_MERGE_PTRUE)]
+	     UNSPEC_PRED_Z)]
 	  UNSPEC_PTEST))
    (set (match_operand:<VPRED> 0 "register_operand" "=Upa, Upa")
 	(unspec:<VPRED>
-	  [(match_dup 4)
+	  [(match_dup 6)
+	   (match_dup 7)
 	   (SVE_INT_CMP:<VPRED>
 	     (match_dup 2)
 	     (match_dup 3))]
-	  UNSPEC_MERGE_PTRUE))]
-  "TARGET_SVE"
+	  UNSPEC_PRED_Z))]
+  "TARGET_SVE
+   && aarch64_sve_same_pred_for_ptest_p (&operands[4], &operands[6])"
   "@
    cmp<cmp_op>\t%0.<Vetype>, %1/z, %2.<Vetype>, #%3
    cmp<cmp_op>\t%0.<Vetype>, %1/z, %2.<Vetype>, %3.<Vetype>"
+  "&& !rtx_equal_p (operands[4], operands[6])"
+  {
+    operands[6] = copy_rtx (operands[4]);
+    operands[7] = operands[5];
+  }
 )
 
-;; Integer comparisons predicated with a PTRUE in which only the flags result
-;; is interesting.
-(define_insn "*cmp<cmp_op><mode>_ptest"
+;; Predicated integer comparisons in which only the flags result is
+;; interesting.
+(define_insn_and_rewrite "*cmp<cmp_op><mode>_ptest"
   [(set (reg:CC_NZC CC_REGNUM)
 	(unspec:CC_NZC
 	  [(match_operand:VNx16BI 1 "register_operand" "Upl, Upl")
 	   (match_operand 4)
 	   (match_operand:SI 5 "aarch64_sve_ptrue_flag")
 	   (unspec:<VPRED>
-	     [(match_dup 4)
+	     [(match_operand 6)
+	      (match_operand:SI 7 "aarch64_sve_ptrue_flag")
 	      (SVE_INT_CMP:<VPRED>
 		(match_operand:SVE_I 2 "register_operand" "w, w")
 		(match_operand:SVE_I 3 "aarch64_sve_cmp_<sve_imm_con>_operand" "<sve_imm_con>, w"))]
-	     UNSPEC_MERGE_PTRUE)]
+	     UNSPEC_PRED_Z)]
 	  UNSPEC_PTEST))
    (clobber (match_scratch:<VPRED> 0 "=Upa, Upa"))]
-  "TARGET_SVE"
+  "TARGET_SVE
+   && aarch64_sve_same_pred_for_ptest_p (&operands[4], &operands[6])"
   "@
    cmp<cmp_op>\t%0.<Vetype>, %1/z, %2.<Vetype>, #%3
    cmp<cmp_op>\t%0.<Vetype>, %1/z, %2.<Vetype>, %3.<Vetype>"
+  "&& !rtx_equal_p (operands[4], operands[6])"
+  {
+    operands[6] = copy_rtx (operands[4]);
+    operands[7] = operands[5];
+  }
 )
 
 ;; Predicated integer comparisons, formed by combining a PTRUE-predicated
 ;; comparison with an AND.  Split the instruction into its preferred form
-;; (below) at the earliest opportunity, in order to get rid of the
-;; redundant operand 1.
-(define_insn_and_split "*pred_cmp<cmp_op><mode>_combine"
+;; at the earliest opportunity, in order to get rid of the redundant
+;; operand 4.
+(define_insn_and_split "*cmp<cmp_op><mode>_and"
   [(set (match_operand:<VPRED> 0 "register_operand" "=Upa, Upa")
-       (and:<VPRED>
-	 (unspec:<VPRED>
-	   [(match_operand:<VPRED> 1)
-	    (SVE_INT_CMP:<VPRED>
-	      (match_operand:SVE_I 2 "register_operand" "w, w")
-	      (match_operand:SVE_I 3 "aarch64_sve_cmp_<sve_imm_con>_operand" "<sve_imm_con>, w"))]
-	   UNSPEC_MERGE_PTRUE)
-	 (match_operand:<VPRED> 4 "register_operand" "Upl, Upl")))
+	(and:<VPRED>
+	  (unspec:<VPRED>
+	    [(match_operand 4)
+	     (const_int SVE_KNOWN_PTRUE)
+	     (SVE_INT_CMP:<VPRED>
+	       (match_operand:SVE_I 2 "register_operand" "w, w")
+	       (match_operand:SVE_I 3 "aarch64_sve_cmp_<sve_imm_con>_operand" "<sve_imm_con>, w"))]
+	    UNSPEC_PRED_Z)
+	  (match_operand:<VPRED> 1 "register_operand" "Upl, Upl")))
    (clobber (reg:CC_NZC CC_REGNUM))]
   "TARGET_SVE"
   "#"
   "&& 1"
   [(parallel
      [(set (match_dup 0)
-	  (and:<VPRED>
-	    (SVE_INT_CMP:<VPRED>
-	      (match_dup 2)
-	      (match_dup 3))
-	    (match_dup 4)))
+	   (unspec:<VPRED>
+	     [(match_dup 1)
+	      (const_int SVE_MAYBE_NOT_PTRUE)
+	      (SVE_INT_CMP:<VPRED>
+		(match_dup 2)
+		(match_dup 3))]
+	     UNSPEC_PRED_Z))
       (clobber (reg:CC_NZC CC_REGNUM))])]
 )
 
-;; Predicated integer comparisons.
-(define_insn "@aarch64_pred_cmp<cmp_op><mode>"
-  [(set (match_operand:<VPRED> 0 "register_operand" "=Upa, Upa")
-	(and:<VPRED>
-	  (SVE_INT_CMP:<VPRED>
-	    (match_operand:SVE_I 2 "register_operand" "w, w")
-	    (match_operand:SVE_I 3 "aarch64_sve_cmp_<sve_imm_con>_operand" "<sve_imm_con>, w"))
-	  (match_operand:<VPRED> 1 "register_operand" "Upl, Upl")))
-   (clobber (reg:CC_NZC CC_REGNUM))]
-  "TARGET_SVE"
-  "@
-   cmp<cmp_op>\t%0.<Vetype>, %1/z, %2.<Vetype>, #%3
-   cmp<cmp_op>\t%0.<Vetype>, %1/z, %2.<Vetype>, %3.<Vetype>"
-)
-
 ;; -------------------------------------------------------------------------
 ;; ---- [INT] While tests
 ;; -------------------------------------------------------------------------