diff mbox series

[committed,AArch64] Rework SVE FP comparisons

Message ID mptblwsp2rj.fsf@arm.com
State New
Headers show
Series [committed,AArch64] Rework SVE FP comparisons | expand

Commit Message

Richard Sandiford Aug. 14, 2019, 8:30 a.m. UTC
This patch rewrites the SVE FP comparisons so that they always use
unspecs and so that they have an additional operand to indicate
whether the predicate is known to be a PTRUE.  It's part of a series
that rewrites the SVE FP patterns so that they can cope with non-PTRUE
predicates.

Tested on aarch64-linux-gnu (with and without SVE) and aarch64_be-elf.
Applied as r274421.

Richard


2019-08-14  Richard Sandiford  <richard.sandiford@arm.com>

gcc/
	* config/aarch64/iterators.md (UNSPEC_COND_FCMUO): New unspec.
	(cmp_op): Handle it.
	(SVE_COND_FP_CMP): Rename to...
	(SVE_COND_FP_CMP_I0): ...this.
	(SVE_FP_CMP): Remove.
	* config/aarch64/aarch64-sve.md
	(*fcm<SVE_FP_CMP:cmp_op><SVE_F:mode>): Replace with...
	(*fcm<SVE_COND_FP_CMP_I0:cmp_op><SVE_F:mode>): ...this new pattern,
	using unspecs to represent the comparison.
	(*fcmuo<SVE_F:mode>): Use UNSPEC_COND_FCMUO.
	(*fcm<cmp_op><mode>_and_combine, *fcmuo<mode>_and_combine): Update
	accordingly.
	* config/aarch64/aarch64.c (aarch64_emit_sve_ptrue_op): Delete.
	(aarch64_unspec_cond_code): Move after integer code.  Handle
	UNORDERED.
	(aarch64_emit_sve_predicated_cond): Replace with...
	(aarch64_emit_sve_fp_cond): ...this new function.
	(aarch64_emit_sve_or_conds): Replace with...
	(aarch64_emit_sve_or_fp_conds): ...this new function.
	(aarch64_emit_sve_inverted_cond): Replace with...
	(aarch64_emit_sve_invert_fp_cond): ...this new function.
	(aarch64_expand_sve_vec_cmp_float): Update accordingly.
diff mbox series

Patch

Index: gcc/config/aarch64/iterators.md
===================================================================
--- gcc/config/aarch64/iterators.md	2019-08-14 09:25:49.689451157 +0100
+++ gcc/config/aarch64/iterators.md	2019-08-14 09:29:14.195939545 +0100
@@ -479,6 +479,7 @@  (define_c_enum "unspec"
     UNSPEC_COND_FCMLE	; Used in aarch64-sve.md.
     UNSPEC_COND_FCMLT	; Used in aarch64-sve.md.
     UNSPEC_COND_FCMNE	; Used in aarch64-sve.md.
+    UNSPEC_COND_FCMUO	; Used in aarch64-sve.md.
     UNSPEC_COND_FDIV	; Used in aarch64-sve.md.
     UNSPEC_COND_FMAXNM	; Used in aarch64-sve.md.
     UNSPEC_COND_FMINNM	; Used in aarch64-sve.md.
@@ -1273,9 +1274,6 @@  (define_code_iterator SVE_UNPRED_FP_BINA
 ;; SVE integer comparisons.
 (define_code_iterator SVE_INT_CMP [lt le eq ne ge gt ltu leu geu gtu])
 
-;; SVE floating-point comparisons.
-(define_code_iterator SVE_FP_CMP [lt le eq ne ge gt])
-
 ;; -------------------------------------------------------------------
 ;; Code Attributes
 ;; -------------------------------------------------------------------
@@ -1663,12 +1661,13 @@  (define_int_iterator SVE_COND_FP_TERNARY
 					  UNSPEC_COND_FNMLA
 					  UNSPEC_COND_FNMLS])
 
-(define_int_iterator SVE_COND_FP_CMP [UNSPEC_COND_FCMEQ
-				      UNSPEC_COND_FCMGE
-				      UNSPEC_COND_FCMGT
-				      UNSPEC_COND_FCMLE
-				      UNSPEC_COND_FCMLT
-				      UNSPEC_COND_FCMNE])
+;; SVE FP comparisons that accept #0.0.
+(define_int_iterator SVE_COND_FP_CMP_I0 [UNSPEC_COND_FCMEQ
+					 UNSPEC_COND_FCMGE
+					 UNSPEC_COND_FCMGT
+					 UNSPEC_COND_FCMLE
+					 UNSPEC_COND_FCMLT
+					 UNSPEC_COND_FCMNE])
 
 (define_int_iterator FCADD [UNSPEC_FCADD90
 			    UNSPEC_FCADD270])
@@ -1955,7 +1954,8 @@  (define_int_attr cmp_op [(UNSPEC_COND_FC
 			 (UNSPEC_COND_FCMGT "gt")
 			 (UNSPEC_COND_FCMLE "le")
 			 (UNSPEC_COND_FCMLT "lt")
-			 (UNSPEC_COND_FCMNE "ne")])
+			 (UNSPEC_COND_FCMNE "ne")
+			 (UNSPEC_COND_FCMUO "uo")])
 
 (define_int_attr sve_int_op [(UNSPEC_ANDV "andv")
 			     (UNSPEC_IORV "orv")
Index: gcc/config/aarch64/aarch64-sve.md
===================================================================
--- gcc/config/aarch64/aarch64-sve.md	2019-08-14 09:25:49.685451187 +0100
+++ gcc/config/aarch64/aarch64-sve.md	2019-08-14 09:29:14.191939575 +0100
@@ -3136,15 +3136,15 @@  (define_expand "vec_cmp<mode><vpred>"
   }
 )
 
-;; Floating-point comparisons predicated with a PTRUE.
+;; Predicated floating-point comparisons.
 (define_insn "*fcm<cmp_op><mode>"
   [(set (match_operand:<VPRED> 0 "register_operand" "=Upa, Upa")
 	(unspec:<VPRED>
 	  [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
-	   (SVE_FP_CMP:<VPRED>
-	     (match_operand:SVE_F 2 "register_operand" "w, w")
-	     (match_operand:SVE_F 3 "aarch64_simd_reg_or_zero" "Dz, w"))]
-	  UNSPEC_MERGE_PTRUE))]
+	   (match_operand:SI 4 "aarch64_sve_ptrue_flag")
+	   (match_operand:SVE_F 2 "register_operand" "w, w")
+	   (match_operand:SVE_F 3 "aarch64_simd_reg_or_zero" "Dz, w")]
+	  SVE_COND_FP_CMP_I0))]
   "TARGET_SVE"
   "@
    fcm<cmp_op>\t%0.<Vetype>, %1/z, %2.<Vetype>, #0.0
@@ -3156,10 +3156,10 @@  (define_insn "*fcmuo<mode>"
   [(set (match_operand:<VPRED> 0 "register_operand" "=Upa")
 	(unspec:<VPRED>
 	  [(match_operand:<VPRED> 1 "register_operand" "Upl")
-	   (unordered:<VPRED>
-	     (match_operand:SVE_F 2 "register_operand" "w")
-	     (match_operand:SVE_F 3 "register_operand" "w"))]
-	  UNSPEC_MERGE_PTRUE))]
+	   (match_operand:SI 4 "aarch64_sve_ptrue_flag")
+	   (match_operand:SVE_F 2 "register_operand" "w")
+	   (match_operand:SVE_F 3 "register_operand" "w")]
+	  UNSPEC_COND_FCMUO))]
   "TARGET_SVE"
   "fcmuo\t%0.<Vetype>, %1/z, %2.<Vetype>, %3.<Vetype>"
 )
@@ -3177,20 +3177,21 @@  (define_insn_and_split "*fcm<cmp_op><mod
 	(and:<VPRED>
 	  (unspec:<VPRED>
 	    [(match_operand:<VPRED> 1)
-	     (SVE_FP_CMP
-	       (match_operand:SVE_F 2 "register_operand" "w, w")
-	       (match_operand:SVE_F 3 "aarch64_simd_reg_or_zero" "Dz, w"))]
-	    UNSPEC_MERGE_PTRUE)
+	     (const_int SVE_KNOWN_PTRUE)
+	     (match_operand:SVE_F 2 "register_operand" "w, w")
+	     (match_operand:SVE_F 3 "aarch64_simd_reg_or_zero" "Dz, w")]
+	    SVE_COND_FP_CMP_I0)
 	  (match_operand:<VPRED> 4 "register_operand" "Upl, Upl")))]
   "TARGET_SVE"
   "#"
   "&& 1"
   [(set (match_dup 0)
-	(and:<VPRED>
-	  (SVE_FP_CMP:<VPRED>
-	    (match_dup 2)
-	    (match_dup 3))
-	  (match_dup 4)))]
+	(unspec:<VPRED>
+	  [(match_dup 4)
+	   (const_int SVE_MAYBE_NOT_PTRUE)
+	   (match_dup 2)
+	   (match_dup 3)]
+	  SVE_COND_FP_CMP_I0))]
 )
 
 ;; Same for unordered comparisons.
@@ -3199,62 +3200,21 @@  (define_insn_and_split "*fcmuo<mode>_and
 	(and:<VPRED>
 	  (unspec:<VPRED>
 	    [(match_operand:<VPRED> 1)
-	     (unordered
-	       (match_operand:SVE_F 2 "register_operand" "w")
-	       (match_operand:SVE_F 3 "register_operand" "w"))]
-	    UNSPEC_MERGE_PTRUE)
+	     (const_int SVE_KNOWN_PTRUE)
+	     (match_operand:SVE_F 2 "register_operand" "w")
+	     (match_operand:SVE_F 3 "register_operand" "w")]
+	    UNSPEC_COND_FCMUO)
 	  (match_operand:<VPRED> 4 "register_operand" "Upl")))]
   "TARGET_SVE"
   "#"
   "&& 1"
   [(set (match_dup 0)
-	(and:<VPRED>
-	  (unordered:<VPRED>
-	    (match_dup 2)
-	    (match_dup 3))
-	  (match_dup 4)))]
-)
-
-;; Unpredicated floating-point comparisons, with the results ANDed with
-;; another predicate.  This is a valid fold for the same reasons as above.
-(define_insn "*fcm<cmp_op><mode>_and"
-  [(set (match_operand:<VPRED> 0 "register_operand" "=Upa, Upa")
-	(and:<VPRED>
-	  (SVE_FP_CMP:<VPRED>
-	    (match_operand:SVE_F 2 "register_operand" "w, w")
-	    (match_operand:SVE_F 3 "aarch64_simd_reg_or_zero" "Dz, w"))
-	  (match_operand:<VPRED> 1 "register_operand" "Upl, Upl")))]
-  "TARGET_SVE"
-  "@
-   fcm<cmp_op>\t%0.<Vetype>, %1/z, %2.<Vetype>, #0.0
-   fcm<cmp_op>\t%0.<Vetype>, %1/z, %2.<Vetype>, %3.<Vetype>"
-)
-
-;; Same for unordered comparisons.
-(define_insn "*fcmuo<mode>_and"
-  [(set (match_operand:<VPRED> 0 "register_operand" "=Upa")
-	(and:<VPRED>
-	  (unordered:<VPRED>
-	    (match_operand:SVE_F 2 "register_operand" "w")
-	    (match_operand:SVE_F 3 "register_operand" "w"))
-	  (match_operand:<VPRED> 1 "register_operand" "Upl")))]
-  "TARGET_SVE"
-  "fcmuo\t%0.<Vetype>, %1/z, %2.<Vetype>, %3.<Vetype>"
-)
-
-;; Predicated floating-point comparisons.  We don't need a version
-;; of this for unordered comparisons.
-(define_insn "*pred_fcm<cmp_op><mode>"
-  [(set (match_operand:<VPRED> 0 "register_operand" "=Upa, Upa")
 	(unspec:<VPRED>
-	  [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
-	   (match_operand:SVE_F 2 "register_operand" "w, w")
-	   (match_operand:SVE_F 3 "aarch64_simd_reg_or_zero" "Dz, w")]
-	  SVE_COND_FP_CMP))]
-  "TARGET_SVE"
-  "@
-   fcm<cmp_op>\t%0.<Vetype>, %1/z, %2.<Vetype>, #0.0
-   fcm<cmp_op>\t%0.<Vetype>, %1/z, %2.<Vetype>, %3.<Vetype>"
+	  [(match_dup 4)
+	   (const_int SVE_MAYBE_NOT_PTRUE)
+	   (match_dup 2)
+	   (match_dup 3)]
+	  UNSPEC_COND_FCMUO))]
 )
 
 ;; -------------------------------------------------------------------------
Index: gcc/config/aarch64/aarch64.c
===================================================================
--- gcc/config/aarch64/aarch64.c	2019-08-14 09:15:57.617827961 +0100
+++ gcc/config/aarch64/aarch64.c	2019-08-14 09:29:14.195939545 +0100
@@ -17700,28 +17700,35 @@  aarch64_sve_cmp_operand_p (rtx_code op_c
 
      (set TARGET OP)
 
-   given that PTRUE is an all-true predicate of the appropriate mode.  */
+   given that PTRUE is an all-true predicate of the appropriate mode
+   and that the instruction clobbers the condition codes.  */
 
 static void
-aarch64_emit_sve_ptrue_op (rtx target, rtx ptrue, rtx op)
+aarch64_emit_sve_ptrue_op_cc (rtx target, rtx ptrue, rtx op)
 {
   rtx unspec = gen_rtx_UNSPEC (GET_MODE (target),
 			       gen_rtvec (2, ptrue, op),
 			       UNSPEC_MERGE_PTRUE);
-  rtx_insn *insn = emit_set_insn (target, unspec);
+  rtx_insn *insn = emit_insn (gen_set_clobber_cc_nzc (target, unspec));
   set_unique_reg_note (insn, REG_EQUAL, copy_rtx (op));
 }
 
-/* Likewise, but also clobber the condition codes.  */
+/* Expand an SVE integer comparison using the SVE equivalent of:
 
-static void
-aarch64_emit_sve_ptrue_op_cc (rtx target, rtx ptrue, rtx op)
+     (set TARGET (CODE OP0 OP1)).  */
+
+void
+aarch64_expand_sve_vec_cmp_int (rtx target, rtx_code code, rtx op0, rtx op1)
 {
-  rtx unspec = gen_rtx_UNSPEC (GET_MODE (target),
-			       gen_rtvec (2, ptrue, op),
-			       UNSPEC_MERGE_PTRUE);
-  rtx_insn *insn = emit_insn (gen_set_clobber_cc_nzc (target, unspec));
-  set_unique_reg_note (insn, REG_EQUAL, copy_rtx (op));
+  machine_mode pred_mode = GET_MODE (target);
+  machine_mode data_mode = GET_MODE (op0);
+
+  if (!aarch64_sve_cmp_operand_p (code, op1))
+    op1 = force_reg (data_mode, op1);
+
+  rtx ptrue = aarch64_ptrue_reg (pred_mode);
+  rtx cond = gen_rtx_fmt_ee (code, pred_mode, op0, op1);
+  aarch64_emit_sve_ptrue_op_cc (target, ptrue, cond);
 }
 
 /* Return the UNSPEC_COND_* code for comparison CODE.  */
@@ -17743,6 +17750,8 @@  aarch64_unspec_cond_code (rtx_code code)
       return UNSPEC_COND_FCMLE;
     case GE:
       return UNSPEC_COND_FCMGE;
+    case UNORDERED:
+      return UNSPEC_COND_FCMUO;
     default:
       gcc_unreachable ();
     }
@@ -17750,78 +17759,58 @@  aarch64_unspec_cond_code (rtx_code code)
 
 /* Emit:
 
-      (set TARGET (unspec [PRED OP0 OP1] UNSPEC_COND_<X>))
+      (set TARGET (unspec [PRED KNOWN_PTRUE_P OP0 OP1] UNSPEC_COND_<X>))
 
-   where <X> is the operation associated with comparison CODE.  This form
-   of instruction is used when (and (CODE OP0 OP1) PRED) would have different
-   semantics, such as when PRED might not be all-true and when comparing
-   inactive lanes could have side effects.  */
+   where <X> is the operation associated with comparison CODE.
+   KNOWN_PTRUE_P is true if PRED is known to be a PTRUE.  */
 
 static void
-aarch64_emit_sve_predicated_cond (rtx target, rtx_code code,
-				  rtx pred, rtx op0, rtx op1)
+aarch64_emit_sve_fp_cond (rtx target, rtx_code code, rtx pred,
+			  bool known_ptrue_p, rtx op0, rtx op1)
 {
+  rtx flag = gen_int_mode (known_ptrue_p, SImode);
   rtx unspec = gen_rtx_UNSPEC (GET_MODE (pred),
-			       gen_rtvec (3, pred, op0, op1),
+			       gen_rtvec (4, pred, flag, op0, op1),
 			       aarch64_unspec_cond_code (code));
   emit_set_insn (target, unspec);
 }
 
-/* Expand an SVE integer comparison using the SVE equivalent of:
-
-     (set TARGET (CODE OP0 OP1)).  */
-
-void
-aarch64_expand_sve_vec_cmp_int (rtx target, rtx_code code, rtx op0, rtx op1)
-{
-  machine_mode pred_mode = GET_MODE (target);
-  machine_mode data_mode = GET_MODE (op0);
-
-  if (!aarch64_sve_cmp_operand_p (code, op1))
-    op1 = force_reg (data_mode, op1);
-
-  rtx ptrue = aarch64_ptrue_reg (pred_mode);
-  rtx cond = gen_rtx_fmt_ee (code, pred_mode, op0, op1);
-  aarch64_emit_sve_ptrue_op_cc (target, ptrue, cond);
-}
-
 /* Emit the SVE equivalent of:
 
-      (set TMP1 (CODE1 OP0 OP1))
-      (set TMP2 (CODE2 OP0 OP1))
+      (set TMP1 (unspec [PRED KNOWN_PTRUE_P OP0 OP1] UNSPEC_COND_<X1>))
+      (set TMP2 (unspec [PRED KNOWN_PTRUE_P OP0 OP1] UNSPEC_COND_<X2>))
       (set TARGET (ior:PRED_MODE TMP1 TMP2))
 
-   PTRUE is an all-true predicate with the same mode as TARGET.  */
+   where <Xi> is the operation associated with comparison CODEi.
+   KNOWN_PTRUE_P is true if PRED is known to be a PTRUE.  */
 
 static void
-aarch64_emit_sve_or_conds (rtx target, rtx_code code1, rtx_code code2,
-			   rtx ptrue, rtx op0, rtx op1)
+aarch64_emit_sve_or_fp_conds (rtx target, rtx_code code1, rtx_code code2,
+			      rtx pred, bool known_ptrue_p, rtx op0, rtx op1)
 {
-  machine_mode pred_mode = GET_MODE (ptrue);
+  machine_mode pred_mode = GET_MODE (pred);
   rtx tmp1 = gen_reg_rtx (pred_mode);
-  aarch64_emit_sve_ptrue_op (tmp1, ptrue,
-			     gen_rtx_fmt_ee (code1, pred_mode, op0, op1));
+  aarch64_emit_sve_fp_cond (tmp1, code1, pred, known_ptrue_p, op0, op1);
   rtx tmp2 = gen_reg_rtx (pred_mode);
-  aarch64_emit_sve_ptrue_op (tmp2, ptrue,
-			     gen_rtx_fmt_ee (code2, pred_mode, op0, op1));
+  aarch64_emit_sve_fp_cond (tmp2, code2, pred, known_ptrue_p, op0, op1);
   aarch64_emit_binop (target, ior_optab, tmp1, tmp2);
 }
 
 /* Emit the SVE equivalent of:
 
-      (set TMP (CODE OP0 OP1))
+      (set TMP (unspec [PRED KNOWN_PTRUE_P OP0 OP1] UNSPEC_COND_<X>))
       (set TARGET (not TMP))
 
-   PTRUE is an all-true predicate with the same mode as TARGET.  */
+   where <X> is the operation associated with comparison CODE.
+   KNOWN_PTRUE_P is true if PRED is known to be a PTRUE.  */
 
 static void
-aarch64_emit_sve_inverted_cond (rtx target, rtx ptrue, rtx_code code,
-				rtx op0, rtx op1)
+aarch64_emit_sve_invert_fp_cond (rtx target, rtx_code code, rtx pred,
+				 bool known_ptrue_p, rtx op0, rtx op1)
 {
-  machine_mode pred_mode = GET_MODE (ptrue);
+  machine_mode pred_mode = GET_MODE (pred);
   rtx tmp = gen_reg_rtx (pred_mode);
-  aarch64_emit_sve_ptrue_op (tmp, ptrue,
-			     gen_rtx_fmt_ee (code, pred_mode, op0, op1));
+  aarch64_emit_sve_fp_cond (tmp, code, pred, known_ptrue_p, op0, op1);
   aarch64_emit_unop (target, one_cmpl_optab, tmp);
 }
 
@@ -17854,14 +17843,13 @@  aarch64_expand_sve_vec_cmp_float (rtx ta
     case NE:
       {
 	/* There is native support for the comparison.  */
-	rtx cond = gen_rtx_fmt_ee (code, pred_mode, op0, op1);
-	aarch64_emit_sve_ptrue_op (target, ptrue, cond);
+	aarch64_emit_sve_fp_cond (target, code, ptrue, true, op0, op1);
 	return false;
       }
 
     case LTGT:
       /* This is a trapping operation (LT or GT).  */
-      aarch64_emit_sve_or_conds (target, LT, GT, ptrue, op0, op1);
+      aarch64_emit_sve_or_fp_conds (target, LT, GT, ptrue, true, op0, op1);
       return false;
 
     case UNEQ:
@@ -17869,7 +17857,8 @@  aarch64_expand_sve_vec_cmp_float (rtx ta
 	{
 	  /* This would trap for signaling NaNs.  */
 	  op1 = force_reg (data_mode, op1);
-	  aarch64_emit_sve_or_conds (target, UNORDERED, EQ, ptrue, op0, op1);
+	  aarch64_emit_sve_or_fp_conds (target, UNORDERED, EQ,
+					ptrue, true, op0, op1);
 	  return false;
 	}
       /* fall through */
@@ -17882,7 +17871,8 @@  aarch64_expand_sve_vec_cmp_float (rtx ta
 	  /* Work out which elements are ordered.  */
 	  rtx ordered = gen_reg_rtx (pred_mode);
 	  op1 = force_reg (data_mode, op1);
-	  aarch64_emit_sve_inverted_cond (ordered, ptrue, UNORDERED, op0, op1);
+	  aarch64_emit_sve_invert_fp_cond (ordered, UNORDERED,
+					   ptrue, true, op0, op1);
 
 	  /* Test the opposite condition for the ordered elements,
 	     then invert the result.  */
@@ -17892,13 +17882,12 @@  aarch64_expand_sve_vec_cmp_float (rtx ta
 	    code = reverse_condition_maybe_unordered (code);
 	  if (can_invert_p)
 	    {
-	      aarch64_emit_sve_predicated_cond (target, code,
-						ordered, op0, op1);
+	      aarch64_emit_sve_fp_cond (target, code,
+					ordered, false, op0, op1);
 	      return true;
 	    }
-	  rtx tmp = gen_reg_rtx (pred_mode);
-	  aarch64_emit_sve_predicated_cond (tmp, code, ordered, op0, op1);
-	  aarch64_emit_unop (target, one_cmpl_optab, tmp);
+	  aarch64_emit_sve_invert_fp_cond (target, code,
+					   ordered, false, op0, op1);
 	  return false;
 	}
       break;
@@ -17916,11 +17905,10 @@  aarch64_expand_sve_vec_cmp_float (rtx ta
   code = reverse_condition_maybe_unordered (code);
   if (can_invert_p)
     {
-      rtx cond = gen_rtx_fmt_ee (code, pred_mode, op0, op1);
-      aarch64_emit_sve_ptrue_op (target, ptrue, cond);
+      aarch64_emit_sve_fp_cond (target, code, ptrue, true, op0, op1);
       return true;
     }
-  aarch64_emit_sve_inverted_cond (target, ptrue, code, op0, op1);
+  aarch64_emit_sve_invert_fp_cond (target, code, ptrue, true, op0, op1);
   return false;
 }