===================================================================
@@ -140,7 +140,9 @@
(UNSPEC_VUZP1 201)
(UNSPEC_VUZP2 202)
(UNSPEC_VZIP1 203)
- (UNSPEC_VZIP2 204)])
+ (UNSPEC_VZIP2 204)
+ (UNSPEC_VCLE 206)
+ (UNSPEC_VCLT 207)])
;; Attribute used to permit string comparisons against <VQH_mnem> in
@@ -1452,6 +1454,169 @@
[(set_attr "neon_type" "neon_int_5")]
)
+;; Conditional instructions. These are comparisons with conditional moves for
+;; vectors. They perform the assignment:
+;;
+;; Vop0 = (Vop4 <op3> Vop5) ? Vop1 : Vop2;
+;;
+;; where op3 is <, <=, ==, !=, >= or >. Operations are performed
+;; element-wise.
+
+(define_expand "vcond<mode>"
+ [(set (match_operand:VDQW 0 "s_register_operand" "")
+ (if_then_else:VDQW
+ (match_operator 3 "arm_comparison_operator"
+ [(match_operand:VDQW 4 "s_register_operand" "")
+ (match_operand:VDQW 5 "nonmemory_operand" "")])
+ (match_operand:VDQW 1 "s_register_operand" "")
+ (match_operand:VDQW 2 "s_register_operand" "")))]
+ "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
+{
+ rtx mask;
+ int inverse = 0, immediate_zero = 0;
+ /* See the description of "magic" bits in the 'T' case of
+ arm_print_operand. */
+ HOST_WIDE_INT magic_word = (<MODE>mode == V2SFmode || <MODE>mode == V4SFmode)
+ ? 3 : 1;
+ rtx magic_rtx = GEN_INT (magic_word);
+
+ mask = gen_reg_rtx (<V_cmp_result>mode);
+
+ if (operands[5] == CONST0_RTX (<MODE>mode))
+ immediate_zero = 1;
+ else if (!REG_P (operands[5]))
+ operands[5] = force_reg (<MODE>mode, operands[5]);
+
+ switch (GET_CODE (operands[3]))
+ {
+ case GE:
+ emit_insn (gen_neon_vcge<mode> (mask, operands[4], operands[5],
+ magic_rtx));
+ break;
+
+ case GT:
+ emit_insn (gen_neon_vcgt<mode> (mask, operands[4], operands[5],
+ magic_rtx));
+ break;
+
+ case EQ:
+ emit_insn (gen_neon_vceq<mode> (mask, operands[4], operands[5],
+ magic_rtx));
+ break;
+
+ case LE:
+ if (immediate_zero)
+ emit_insn (gen_neon_vcle<mode> (mask, operands[4], operands[5],
+ magic_rtx));
+ else
+ emit_insn (gen_neon_vcge<mode> (mask, operands[5], operands[4],
+ magic_rtx));
+ break;
+
+ case LT:
+ if (immediate_zero)
+ emit_insn (gen_neon_vclt<mode> (mask, operands[4], operands[5],
+ magic_rtx));
+ else
+ emit_insn (gen_neon_vcgt<mode> (mask, operands[5], operands[4],
+ magic_rtx));
+ break;
+
+ case NE:
+ emit_insn (gen_neon_vceq<mode> (mask, operands[4], operands[5],
+ magic_rtx));
+ inverse = 1;
+ break;
+
+ default:
+ gcc_unreachable ();
+ }
+
+ if (inverse)
+ emit_insn (gen_neon_vbsl<mode> (operands[0], mask, operands[2],
+ operands[1]));
+ else
+ emit_insn (gen_neon_vbsl<mode> (operands[0], mask, operands[1],
+ operands[2]));
+
+ DONE;
+})
+
+(define_expand "vcondu<mode>"
+ [(set (match_operand:VDQIW 0 "s_register_operand" "")
+ (if_then_else:VDQIW
+ (match_operator 3 "arm_comparison_operator"
+ [(match_operand:VDQIW 4 "s_register_operand" "")
+ (match_operand:VDQIW 5 "s_register_operand" "")])
+ (match_operand:VDQIW 1 "s_register_operand" "")
+ (match_operand:VDQIW 2 "s_register_operand" "")))]
+ "TARGET_NEON"
+{
+ rtx mask;
+ int inverse = 0, immediate_zero = 0;
+
+ mask = gen_reg_rtx (<V_cmp_result>mode);
+
+ if (operands[5] == CONST0_RTX (<MODE>mode))
+ immediate_zero = 1;
+ else if (!REG_P (operands[5]))
+ operands[5] = force_reg (<MODE>mode, operands[5]);
+
+ switch (GET_CODE (operands[3]))
+ {
+ case GEU:
+ emit_insn (gen_neon_vcge<mode> (mask, operands[4], operands[5],
+ const0_rtx));
+ break;
+
+ case GTU:
+ emit_insn (gen_neon_vcgt<mode> (mask, operands[4], operands[5],
+ const0_rtx));
+ break;
+
+ case EQ:
+ emit_insn (gen_neon_vceq<mode> (mask, operands[4], operands[5],
+ const0_rtx));
+ break;
+
+ case LEU:
+ if (immediate_zero)
+ emit_insn (gen_neon_vcle<mode> (mask, operands[4], operands[5],
+ const0_rtx));
+ else
+ emit_insn (gen_neon_vcge<mode> (mask, operands[5], operands[4],
+ const0_rtx));
+ break;
+
+ case LTU:
+ if (immediate_zero)
+ emit_insn (gen_neon_vclt<mode> (mask, operands[4], operands[5],
+ const0_rtx));
+ else
+ emit_insn (gen_neon_vcgt<mode> (mask, operands[5], operands[4],
+ const0_rtx));
+ break;
+
+ case NE:
+ emit_insn (gen_neon_vceq<mode> (mask, operands[4], operands[5],
+ const0_rtx));
+ inverse = 1;
+ break;
+
+ default:
+ gcc_unreachable ();
+ }
+
+ if (inverse)
+ emit_insn (gen_neon_vbsl<mode> (operands[0], mask, operands[2],
+ operands[1]));
+ else
+ emit_insn (gen_neon_vbsl<mode> (operands[0], mask, operands[1],
+ operands[2]));
+
+ DONE;
+})
+
;; Patterns for builtins.
; good for plain vadd, vaddq.
@@ -1863,13 +2028,16 @@
)
(define_insn "neon_vceq<mode>"
- [(set (match_operand:<V_cmp_result> 0 "s_register_operand" "=w")
- (unspec:<V_cmp_result> [(match_operand:VDQW 1 "s_register_operand" "w")
- (match_operand:VDQW 2 "s_register_operand" "w")
- (match_operand:SI 3 "immediate_operand" "i")]
- UNSPEC_VCEQ))]
+ [(set (match_operand:<V_cmp_result> 0 "s_register_operand" "=w,w")
+ (unspec:<V_cmp_result>
+ [(match_operand:VDQW 1 "s_register_operand" "w,w")
+ (match_operand:VDQW 2 "nonmemory_operand" "w,Dz")
+ (match_operand:SI 3 "immediate_operand" "i,i")]
+ UNSPEC_VCEQ))]
"TARGET_NEON"
- "vceq.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
+ "@
+ vceq.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2
+ vceq.<V_if_elem>\t%<V_reg>0, %<V_reg>1, #0"
[(set (attr "neon_type")
(if_then_else (ne (symbol_ref "<Is_float_mode>") (const_int 0))
(if_then_else (ne (symbol_ref "<Is_d_reg>") (const_int 0))
@@ -1879,13 +2047,16 @@
)
(define_insn "neon_vcge<mode>"
- [(set (match_operand:<V_cmp_result> 0 "s_register_operand" "=w")
- (unspec:<V_cmp_result> [(match_operand:VDQW 1 "s_register_operand" "w")
- (match_operand:VDQW 2 "s_register_operand" "w")
- (match_operand:SI 3 "immediate_operand" "i")]
- UNSPEC_VCGE))]
+ [(set (match_operand:<V_cmp_result> 0 "s_register_operand" "=w,w")
+ (unspec:<V_cmp_result>
+ [(match_operand:VDQW 1 "s_register_operand" "w,w")
+ (match_operand:VDQW 2 "nonmemory_operand" "w,Dz")
+ (match_operand:SI 3 "immediate_operand" "i,i")]
+ UNSPEC_VCGE))]
"TARGET_NEON"
- "vcge.%T3%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
+ "@
+ vcge.%T3%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2
+ vcge.%T3%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, #0"
[(set (attr "neon_type")
(if_then_else (ne (symbol_ref "<Is_float_mode>") (const_int 0))
(if_then_else (ne (symbol_ref "<Is_d_reg>") (const_int 0))
@@ -1895,13 +2066,16 @@
)
(define_insn "neon_vcgt<mode>"
- [(set (match_operand:<V_cmp_result> 0 "s_register_operand" "=w")
- (unspec:<V_cmp_result> [(match_operand:VDQW 1 "s_register_operand" "w")
- (match_operand:VDQW 2 "s_register_operand" "w")
- (match_operand:SI 3 "immediate_operand" "i")]
- UNSPEC_VCGT))]
+ [(set (match_operand:<V_cmp_result> 0 "s_register_operand" "=w,w")
+ (unspec:<V_cmp_result>
+ [(match_operand:VDQW 1 "s_register_operand" "w,w")
+ (match_operand:VDQW 2 "nonmemory_operand" "w,Dz")
+ (match_operand:SI 3 "immediate_operand" "i,i")]
+ UNSPEC_VCGT))]
"TARGET_NEON"
- "vcgt.%T3%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
+ "@
+ vcgt.%T3%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2
+ vcgt.%T3%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, #0"
[(set (attr "neon_type")
(if_then_else (ne (symbol_ref "<Is_float_mode>") (const_int 0))
(if_then_else (ne (symbol_ref "<Is_d_reg>") (const_int 0))
@@ -1910,6 +2084,43 @@
(const_string "neon_int_5")))]
)
+;; VCLE and VCLT only support comparisons with immediate zero (register
+;; variants are VCGE and VCGT with operands reversed).
+
+(define_insn "neon_vcle<mode>"
+ [(set (match_operand:<V_cmp_result> 0 "s_register_operand" "=w")
+ (unspec:<V_cmp_result>
+ [(match_operand:VDQW 1 "s_register_operand" "w")
+ (match_operand:VDQW 2 "nonmemory_operand" "Dz")
+ (match_operand:SI 3 "immediate_operand" "i")]
+ UNSPEC_VCLE))]
+ "TARGET_NEON"
+ "vcle.%T3%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, #0"
+ [(set (attr "neon_type")
+ (if_then_else (ne (symbol_ref "<Is_float_mode>") (const_int 0))
+ (if_then_else (ne (symbol_ref "<Is_d_reg>") (const_int 0))
+ (const_string "neon_fp_vadd_ddd_vabs_dd")
+ (const_string "neon_fp_vadd_qqq_vabs_qq"))
+ (const_string "neon_int_5")))]
+)
+
+(define_insn "neon_vclt<mode>"
+ [(set (match_operand:<V_cmp_result> 0 "s_register_operand" "=w")
+ (unspec:<V_cmp_result>
+ [(match_operand:VDQW 1 "s_register_operand" "w")
+ (match_operand:VDQW 2 "nonmemory_operand" "Dz")
+ (match_operand:SI 3 "immediate_operand" "i")]
+ UNSPEC_VCLT))]
+ "TARGET_NEON"
+ "vclt.%T3%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, #0"
+ [(set (attr "neon_type")
+ (if_then_else (ne (symbol_ref "<Is_float_mode>") (const_int 0))
+ (if_then_else (ne (symbol_ref "<Is_d_reg>") (const_int 0))
+ (const_string "neon_fp_vadd_ddd_vabs_dd")
+ (const_string "neon_fp_vadd_qqq_vabs_qq"))
+ (const_string "neon_int_5")))]
+)
+
(define_insn "neon_vcage<mode>"
[(set (match_operand:<V_cmp_result> 0 "s_register_operand" "=w")
(unspec:<V_cmp_result> [(match_operand:VCVTF 1 "s_register_operand" "w")
===================================================================
@@ -29,7 +29,7 @@
;; in Thumb-1 state: I, J, K, L, M, N, O
;; The following multi-letter normal constraints have been used:
-;; in ARM/Thumb-2 state: Da, Db, Dc, Dn, Dl, DL, Dv, Dy, Di
+;; in ARM/Thumb-2 state: Da, Db, Dc, Dn, Dl, DL, Dv, Dy, Di, Dz
;; in Thumb-1 state: Pa, Pb, Pc, Pd
;; in Thumb-2 state: Ps, Pt, Pu, Pv, Pw, Px
@@ -199,6 +199,12 @@
(and (match_code "const_double")
(match_test "TARGET_32BIT && neg_const_double_rtx_ok_for_fpa (op)")))
+(define_constraint "Dz"
+ "@internal
+ In ARM/Thumb-2 state a vector of constant zeros."
+ (and (match_code "const_vector")
+ (match_test "TARGET_NEON && op == CONST0_RTX (mode)")))
+
(define_constraint "Da"
"@internal
In ARM/Thumb-2 state a const_int, const_double or const_vector that can
Hi, This patch implements vcond<mode> and vcondu<mode> for NEON, fixing the testsuite failure gcc.dg/vect/pr43430-1.c. These are RTX "standard names", but are unfortunately undocumented at present (see PR29269), so the intended semantics have been cargo-culted from other backends which implement the pattern. These vector comparisons provide a rather nice extension to the capabilities of the vectorizer. Also, the patterns for vcge, vcgt and vceq instructions have been extended to support the immediate variants (only comparisons with zero are supported), and vcle and vclt immediate patterns have been added. I haven't attempted to hook these up to the intrinsic-expansion mechanism, so those won't support the immediate-zero mode yet. (Note the gap in the numbering for the unspecs is intended to be filled by the NEON misalignment-support patch, when that is approved). Tested with cross to ARM Linux, using the options "-mfpu=neon -march=armv7-a -mfloat-abi=softfp" (gcc, g++ and libstdc++). The only change in test results is the transition of the test named above from FAIL to PASS. OK to apply? Thanks, Julian ChangeLog gcc/ * config/arm/neon.md (UNSPEC_VCLE, UNSPEC_VCLT): New constants for unspecs. (vcond<mode>, vcondu<mode>): New expanders. (neon_vceq<mode>, neon_vcge<mode>, neon_vcgt<mode>): Support comparisons with zero. (neon_vcle<mode>, neon_vclt<mode>): New patterns. * config/arm/constraints.md (Dz): New constraint.