===================================================================
@@ -479,6 +479,7 @@ (define_c_enum "unspec"
UNSPEC_COND_FCMLE ; Used in aarch64-sve.md.
UNSPEC_COND_FCMLT ; Used in aarch64-sve.md.
UNSPEC_COND_FCMNE ; Used in aarch64-sve.md.
+ UNSPEC_COND_FCMUO ; Used in aarch64-sve.md.
UNSPEC_COND_FDIV ; Used in aarch64-sve.md.
UNSPEC_COND_FMAXNM ; Used in aarch64-sve.md.
UNSPEC_COND_FMINNM ; Used in aarch64-sve.md.
@@ -1273,9 +1274,6 @@ (define_code_iterator SVE_UNPRED_FP_BINA
;; SVE integer comparisons.
(define_code_iterator SVE_INT_CMP [lt le eq ne ge gt ltu leu geu gtu])
-;; SVE floating-point comparisons.
-(define_code_iterator SVE_FP_CMP [lt le eq ne ge gt])
-
;; -------------------------------------------------------------------
;; Code Attributes
;; -------------------------------------------------------------------
@@ -1663,12 +1661,13 @@ (define_int_iterator SVE_COND_FP_TERNARY
UNSPEC_COND_FNMLA
UNSPEC_COND_FNMLS])
-(define_int_iterator SVE_COND_FP_CMP [UNSPEC_COND_FCMEQ
- UNSPEC_COND_FCMGE
- UNSPEC_COND_FCMGT
- UNSPEC_COND_FCMLE
- UNSPEC_COND_FCMLT
- UNSPEC_COND_FCMNE])
+;; SVE FP comparisons that accept #0.0.
+(define_int_iterator SVE_COND_FP_CMP_I0 [UNSPEC_COND_FCMEQ
+ UNSPEC_COND_FCMGE
+ UNSPEC_COND_FCMGT
+ UNSPEC_COND_FCMLE
+ UNSPEC_COND_FCMLT
+ UNSPEC_COND_FCMNE])
(define_int_iterator FCADD [UNSPEC_FCADD90
UNSPEC_FCADD270])
@@ -1955,7 +1954,8 @@ (define_int_attr cmp_op [(UNSPEC_COND_FC
(UNSPEC_COND_FCMGT "gt")
(UNSPEC_COND_FCMLE "le")
(UNSPEC_COND_FCMLT "lt")
- (UNSPEC_COND_FCMNE "ne")])
+ (UNSPEC_COND_FCMNE "ne")
+ (UNSPEC_COND_FCMUO "uo")])
(define_int_attr sve_int_op [(UNSPEC_ANDV "andv")
(UNSPEC_IORV "orv")
===================================================================
@@ -3136,15 +3136,15 @@ (define_expand "vec_cmp<mode><vpred>"
}
)
-;; Floating-point comparisons predicated with a PTRUE.
+;; Predicated floating-point comparisons.
(define_insn "*fcm<cmp_op><mode>"
[(set (match_operand:<VPRED> 0 "register_operand" "=Upa, Upa")
(unspec:<VPRED>
[(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
- (SVE_FP_CMP:<VPRED>
- (match_operand:SVE_F 2 "register_operand" "w, w")
- (match_operand:SVE_F 3 "aarch64_simd_reg_or_zero" "Dz, w"))]
- UNSPEC_MERGE_PTRUE))]
+ (match_operand:SI 4 "aarch64_sve_ptrue_flag")
+ (match_operand:SVE_F 2 "register_operand" "w, w")
+ (match_operand:SVE_F 3 "aarch64_simd_reg_or_zero" "Dz, w")]
+ SVE_COND_FP_CMP_I0))]
"TARGET_SVE"
"@
fcm<cmp_op>\t%0.<Vetype>, %1/z, %2.<Vetype>, #0.0
@@ -3156,10 +3156,10 @@ (define_insn "*fcmuo<mode>"
[(set (match_operand:<VPRED> 0 "register_operand" "=Upa")
(unspec:<VPRED>
[(match_operand:<VPRED> 1 "register_operand" "Upl")
- (unordered:<VPRED>
- (match_operand:SVE_F 2 "register_operand" "w")
- (match_operand:SVE_F 3 "register_operand" "w"))]
- UNSPEC_MERGE_PTRUE))]
+ (match_operand:SI 4 "aarch64_sve_ptrue_flag")
+ (match_operand:SVE_F 2 "register_operand" "w")
+ (match_operand:SVE_F 3 "register_operand" "w")]
+ UNSPEC_COND_FCMUO))]
"TARGET_SVE"
"fcmuo\t%0.<Vetype>, %1/z, %2.<Vetype>, %3.<Vetype>"
)
@@ -3177,20 +3177,21 @@ (define_insn_and_split "*fcm<cmp_op><mod
(and:<VPRED>
(unspec:<VPRED>
[(match_operand:<VPRED> 1)
- (SVE_FP_CMP
- (match_operand:SVE_F 2 "register_operand" "w, w")
- (match_operand:SVE_F 3 "aarch64_simd_reg_or_zero" "Dz, w"))]
- UNSPEC_MERGE_PTRUE)
+ (const_int SVE_KNOWN_PTRUE)
+ (match_operand:SVE_F 2 "register_operand" "w, w")
+ (match_operand:SVE_F 3 "aarch64_simd_reg_or_zero" "Dz, w")]
+ SVE_COND_FP_CMP_I0)
(match_operand:<VPRED> 4 "register_operand" "Upl, Upl")))]
"TARGET_SVE"
"#"
"&& 1"
[(set (match_dup 0)
- (and:<VPRED>
- (SVE_FP_CMP:<VPRED>
- (match_dup 2)
- (match_dup 3))
- (match_dup 4)))]
+ (unspec:<VPRED>
+ [(match_dup 4)
+ (const_int SVE_MAYBE_NOT_PTRUE)
+ (match_dup 2)
+ (match_dup 3)]
+ SVE_COND_FP_CMP_I0))]
)
;; Same for unordered comparisons.
@@ -3199,62 +3200,21 @@ (define_insn_and_split "*fcmuo<mode>_and
(and:<VPRED>
(unspec:<VPRED>
[(match_operand:<VPRED> 1)
- (unordered
- (match_operand:SVE_F 2 "register_operand" "w")
- (match_operand:SVE_F 3 "register_operand" "w"))]
- UNSPEC_MERGE_PTRUE)
+ (const_int SVE_KNOWN_PTRUE)
+ (match_operand:SVE_F 2 "register_operand" "w")
+ (match_operand:SVE_F 3 "register_operand" "w")]
+ UNSPEC_COND_FCMUO)
(match_operand:<VPRED> 4 "register_operand" "Upl")))]
"TARGET_SVE"
"#"
"&& 1"
[(set (match_dup 0)
- (and:<VPRED>
- (unordered:<VPRED>
- (match_dup 2)
- (match_dup 3))
- (match_dup 4)))]
-)
-
-;; Unpredicated floating-point comparisons, with the results ANDed with
-;; another predicate. This is a valid fold for the same reasons as above.
-(define_insn "*fcm<cmp_op><mode>_and"
- [(set (match_operand:<VPRED> 0 "register_operand" "=Upa, Upa")
- (and:<VPRED>
- (SVE_FP_CMP:<VPRED>
- (match_operand:SVE_F 2 "register_operand" "w, w")
- (match_operand:SVE_F 3 "aarch64_simd_reg_or_zero" "Dz, w"))
- (match_operand:<VPRED> 1 "register_operand" "Upl, Upl")))]
- "TARGET_SVE"
- "@
- fcm<cmp_op>\t%0.<Vetype>, %1/z, %2.<Vetype>, #0.0
- fcm<cmp_op>\t%0.<Vetype>, %1/z, %2.<Vetype>, %3.<Vetype>"
-)
-
-;; Same for unordered comparisons.
-(define_insn "*fcmuo<mode>_and"
- [(set (match_operand:<VPRED> 0 "register_operand" "=Upa")
- (and:<VPRED>
- (unordered:<VPRED>
- (match_operand:SVE_F 2 "register_operand" "w")
- (match_operand:SVE_F 3 "register_operand" "w"))
- (match_operand:<VPRED> 1 "register_operand" "Upl")))]
- "TARGET_SVE"
- "fcmuo\t%0.<Vetype>, %1/z, %2.<Vetype>, %3.<Vetype>"
-)
-
-;; Predicated floating-point comparisons. We don't need a version
-;; of this for unordered comparisons.
-(define_insn "*pred_fcm<cmp_op><mode>"
- [(set (match_operand:<VPRED> 0 "register_operand" "=Upa, Upa")
(unspec:<VPRED>
- [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
- (match_operand:SVE_F 2 "register_operand" "w, w")
- (match_operand:SVE_F 3 "aarch64_simd_reg_or_zero" "Dz, w")]
- SVE_COND_FP_CMP))]
- "TARGET_SVE"
- "@
- fcm<cmp_op>\t%0.<Vetype>, %1/z, %2.<Vetype>, #0.0
- fcm<cmp_op>\t%0.<Vetype>, %1/z, %2.<Vetype>, %3.<Vetype>"
+ [(match_dup 4)
+ (const_int SVE_MAYBE_NOT_PTRUE)
+ (match_dup 2)
+ (match_dup 3)]
+ UNSPEC_COND_FCMUO))]
)
;; -------------------------------------------------------------------------
===================================================================
@@ -17700,28 +17700,35 @@ aarch64_sve_cmp_operand_p (rtx_code op_c
(set TARGET OP)
- given that PTRUE is an all-true predicate of the appropriate mode. */
+ given that PTRUE is an all-true predicate of the appropriate mode
+ and that the instruction clobbers the condition codes. */
static void
-aarch64_emit_sve_ptrue_op (rtx target, rtx ptrue, rtx op)
+aarch64_emit_sve_ptrue_op_cc (rtx target, rtx ptrue, rtx op)
{
rtx unspec = gen_rtx_UNSPEC (GET_MODE (target),
gen_rtvec (2, ptrue, op),
UNSPEC_MERGE_PTRUE);
- rtx_insn *insn = emit_set_insn (target, unspec);
+ rtx_insn *insn = emit_insn (gen_set_clobber_cc_nzc (target, unspec));
set_unique_reg_note (insn, REG_EQUAL, copy_rtx (op));
}
-/* Likewise, but also clobber the condition codes. */
+/* Expand an SVE integer comparison using the SVE equivalent of:
-static void
-aarch64_emit_sve_ptrue_op_cc (rtx target, rtx ptrue, rtx op)
+ (set TARGET (CODE OP0 OP1)). */
+
+void
+aarch64_expand_sve_vec_cmp_int (rtx target, rtx_code code, rtx op0, rtx op1)
{
- rtx unspec = gen_rtx_UNSPEC (GET_MODE (target),
- gen_rtvec (2, ptrue, op),
- UNSPEC_MERGE_PTRUE);
- rtx_insn *insn = emit_insn (gen_set_clobber_cc_nzc (target, unspec));
- set_unique_reg_note (insn, REG_EQUAL, copy_rtx (op));
+ machine_mode pred_mode = GET_MODE (target);
+ machine_mode data_mode = GET_MODE (op0);
+
+ if (!aarch64_sve_cmp_operand_p (code, op1))
+ op1 = force_reg (data_mode, op1);
+
+ rtx ptrue = aarch64_ptrue_reg (pred_mode);
+ rtx cond = gen_rtx_fmt_ee (code, pred_mode, op0, op1);
+ aarch64_emit_sve_ptrue_op_cc (target, ptrue, cond);
}
/* Return the UNSPEC_COND_* code for comparison CODE. */
@@ -17743,6 +17750,8 @@ aarch64_unspec_cond_code (rtx_code code)
return UNSPEC_COND_FCMLE;
case GE:
return UNSPEC_COND_FCMGE;
+ case UNORDERED:
+ return UNSPEC_COND_FCMUO;
default:
gcc_unreachable ();
}
@@ -17750,78 +17759,58 @@ aarch64_unspec_cond_code (rtx_code code)
/* Emit:
- (set TARGET (unspec [PRED OP0 OP1] UNSPEC_COND_<X>))
+ (set TARGET (unspec [PRED KNOWN_PTRUE_P OP0 OP1] UNSPEC_COND_<X>))
- where <X> is the operation associated with comparison CODE. This form
- of instruction is used when (and (CODE OP0 OP1) PRED) would have different
- semantics, such as when PRED might not be all-true and when comparing
- inactive lanes could have side effects. */
+ where <X> is the operation associated with comparison CODE.
+ KNOWN_PTRUE_P is true if PRED is known to be a PTRUE. */
static void
-aarch64_emit_sve_predicated_cond (rtx target, rtx_code code,
- rtx pred, rtx op0, rtx op1)
+aarch64_emit_sve_fp_cond (rtx target, rtx_code code, rtx pred,
+ bool known_ptrue_p, rtx op0, rtx op1)
{
+ rtx flag = gen_int_mode (known_ptrue_p, SImode);
rtx unspec = gen_rtx_UNSPEC (GET_MODE (pred),
- gen_rtvec (3, pred, op0, op1),
+ gen_rtvec (4, pred, flag, op0, op1),
aarch64_unspec_cond_code (code));
emit_set_insn (target, unspec);
}
-/* Expand an SVE integer comparison using the SVE equivalent of:
-
- (set TARGET (CODE OP0 OP1)). */
-
-void
-aarch64_expand_sve_vec_cmp_int (rtx target, rtx_code code, rtx op0, rtx op1)
-{
- machine_mode pred_mode = GET_MODE (target);
- machine_mode data_mode = GET_MODE (op0);
-
- if (!aarch64_sve_cmp_operand_p (code, op1))
- op1 = force_reg (data_mode, op1);
-
- rtx ptrue = aarch64_ptrue_reg (pred_mode);
- rtx cond = gen_rtx_fmt_ee (code, pred_mode, op0, op1);
- aarch64_emit_sve_ptrue_op_cc (target, ptrue, cond);
-}
-
/* Emit the SVE equivalent of:
- (set TMP1 (CODE1 OP0 OP1))
- (set TMP2 (CODE2 OP0 OP1))
+ (set TMP1 (unspec [PRED KNOWN_PTRUE_P OP0 OP1] UNSPEC_COND_<X1>))
+ (set TMP2 (unspec [PRED KNOWN_PTRUE_P OP0 OP1] UNSPEC_COND_<X2>))
(set TARGET (ior:PRED_MODE TMP1 TMP2))
- PTRUE is an all-true predicate with the same mode as TARGET. */
+ where <Xi> is the operation associated with comparison CODEi.
+ KNOWN_PTRUE_P is true if PRED is known to be a PTRUE. */
static void
-aarch64_emit_sve_or_conds (rtx target, rtx_code code1, rtx_code code2,
- rtx ptrue, rtx op0, rtx op1)
+aarch64_emit_sve_or_fp_conds (rtx target, rtx_code code1, rtx_code code2,
+ rtx pred, bool known_ptrue_p, rtx op0, rtx op1)
{
- machine_mode pred_mode = GET_MODE (ptrue);
+ machine_mode pred_mode = GET_MODE (pred);
rtx tmp1 = gen_reg_rtx (pred_mode);
- aarch64_emit_sve_ptrue_op (tmp1, ptrue,
- gen_rtx_fmt_ee (code1, pred_mode, op0, op1));
+ aarch64_emit_sve_fp_cond (tmp1, code1, pred, known_ptrue_p, op0, op1);
rtx tmp2 = gen_reg_rtx (pred_mode);
- aarch64_emit_sve_ptrue_op (tmp2, ptrue,
- gen_rtx_fmt_ee (code2, pred_mode, op0, op1));
+ aarch64_emit_sve_fp_cond (tmp2, code2, pred, known_ptrue_p, op0, op1);
aarch64_emit_binop (target, ior_optab, tmp1, tmp2);
}
/* Emit the SVE equivalent of:
- (set TMP (CODE OP0 OP1))
+ (set TMP (unspec [PRED KNOWN_PTRUE_P OP0 OP1] UNSPEC_COND_<X>))
(set TARGET (not TMP))
- PTRUE is an all-true predicate with the same mode as TARGET. */
+ where <X> is the operation associated with comparison CODE.
+ KNOWN_PTRUE_P is true if PRED is known to be a PTRUE. */
static void
-aarch64_emit_sve_inverted_cond (rtx target, rtx ptrue, rtx_code code,
- rtx op0, rtx op1)
+aarch64_emit_sve_invert_fp_cond (rtx target, rtx_code code, rtx pred,
+ bool known_ptrue_p, rtx op0, rtx op1)
{
- machine_mode pred_mode = GET_MODE (ptrue);
+ machine_mode pred_mode = GET_MODE (pred);
rtx tmp = gen_reg_rtx (pred_mode);
- aarch64_emit_sve_ptrue_op (tmp, ptrue,
- gen_rtx_fmt_ee (code, pred_mode, op0, op1));
+ aarch64_emit_sve_fp_cond (tmp, code, pred, known_ptrue_p, op0, op1);
aarch64_emit_unop (target, one_cmpl_optab, tmp);
}
@@ -17854,14 +17843,13 @@ aarch64_expand_sve_vec_cmp_float (rtx ta
case NE:
{
/* There is native support for the comparison. */
- rtx cond = gen_rtx_fmt_ee (code, pred_mode, op0, op1);
- aarch64_emit_sve_ptrue_op (target, ptrue, cond);
+ aarch64_emit_sve_fp_cond (target, code, ptrue, true, op0, op1);
return false;
}
case LTGT:
/* This is a trapping operation (LT or GT). */
- aarch64_emit_sve_or_conds (target, LT, GT, ptrue, op0, op1);
+ aarch64_emit_sve_or_fp_conds (target, LT, GT, ptrue, true, op0, op1);
return false;
case UNEQ:
@@ -17869,7 +17857,8 @@ aarch64_expand_sve_vec_cmp_float (rtx ta
{
/* This would trap for signaling NaNs. */
op1 = force_reg (data_mode, op1);
- aarch64_emit_sve_or_conds (target, UNORDERED, EQ, ptrue, op0, op1);
+ aarch64_emit_sve_or_fp_conds (target, UNORDERED, EQ,
+ ptrue, true, op0, op1);
return false;
}
/* fall through */
@@ -17882,7 +17871,8 @@ aarch64_expand_sve_vec_cmp_float (rtx ta
/* Work out which elements are ordered. */
rtx ordered = gen_reg_rtx (pred_mode);
op1 = force_reg (data_mode, op1);
- aarch64_emit_sve_inverted_cond (ordered, ptrue, UNORDERED, op0, op1);
+ aarch64_emit_sve_invert_fp_cond (ordered, UNORDERED,
+ ptrue, true, op0, op1);
/* Test the opposite condition for the ordered elements,
then invert the result. */
@@ -17892,13 +17882,12 @@ aarch64_expand_sve_vec_cmp_float (rtx ta
code = reverse_condition_maybe_unordered (code);
if (can_invert_p)
{
- aarch64_emit_sve_predicated_cond (target, code,
- ordered, op0, op1);
+ aarch64_emit_sve_fp_cond (target, code,
+ ordered, false, op0, op1);
return true;
}
- rtx tmp = gen_reg_rtx (pred_mode);
- aarch64_emit_sve_predicated_cond (tmp, code, ordered, op0, op1);
- aarch64_emit_unop (target, one_cmpl_optab, tmp);
+ aarch64_emit_sve_invert_fp_cond (target, code,
+ ordered, false, op0, op1);
return false;
}
break;
@@ -17916,11 +17905,10 @@ aarch64_expand_sve_vec_cmp_float (rtx ta
code = reverse_condition_maybe_unordered (code);
if (can_invert_p)
{
- rtx cond = gen_rtx_fmt_ee (code, pred_mode, op0, op1);
- aarch64_emit_sve_ptrue_op (target, ptrue, cond);
+ aarch64_emit_sve_fp_cond (target, code, ptrue, true, op0, op1);
return true;
}
- aarch64_emit_sve_inverted_cond (target, ptrue, code, op0, op1);
+ aarch64_emit_sve_invert_fp_cond (target, code, ptrue, true, op0, op1);
return false;
}