@@ -2256,13 +2256,13 @@
;; in *aarch64_simd_bsl<mode>_alt.
(define_insn "aarch64_simd_bsl<mode>_internal"
- [(set (match_operand:VSDQ_I_DI 0 "register_operand" "=w,w,w")
- (xor:VSDQ_I_DI
- (and:VSDQ_I_DI
- (xor:VSDQ_I_DI
+ [(set (match_operand:VDQ_I 0 "register_operand" "=w,w,w")
+ (xor:VDQ_I
+ (and:VDQ_I
+ (xor:VDQ_I
(match_operand:<V_cmp_result> 3 "register_operand" "w,0,w")
- (match_operand:VSDQ_I_DI 2 "register_operand" "w,w,0"))
- (match_operand:VSDQ_I_DI 1 "register_operand" "0,w,w"))
+ (match_operand:VDQ_I 2 "register_operand" "w,w,0"))
+ (match_operand:VDQ_I 1 "register_operand" "0,w,w"))
(match_dup:<V_cmp_result> 3)
))]
"TARGET_SIMD"
@@ -2280,14 +2280,14 @@
;; permutations of commutative operations, we have to have a separate pattern.
(define_insn "*aarch64_simd_bsl<mode>_alt"
- [(set (match_operand:VSDQ_I_DI 0 "register_operand" "=w,w,w")
- (xor:VSDQ_I_DI
- (and:VSDQ_I_DI
- (xor:VSDQ_I_DI
- (match_operand:VSDQ_I_DI 3 "register_operand" "w,w,0")
- (match_operand:VSDQ_I_DI 2 "register_operand" "w,0,w"))
- (match_operand:VSDQ_I_DI 1 "register_operand" "0,w,w"))
- (match_dup:VSDQ_I_DI 2)))]
+ [(set (match_operand:VDQ_I 0 "register_operand" "=w,w,w")
+ (xor:VDQ_I
+ (and:VDQ_I
+ (xor:VDQ_I
+ (match_operand:VDQ_I 3 "register_operand" "w,w,0")
+ (match_operand:VDQ_I 2 "register_operand" "w,0,w"))
+ (match_operand:VDQ_I 1 "register_operand" "0,w,w"))
+ (match_dup:VDQ_I 2)))]
"TARGET_SIMD"
"@
bsl\\t%0.<Vbtype>, %3.<Vbtype>, %2.<Vbtype>
@@ -2296,6 +2296,44 @@
[(set_attr "type" "neon_bsl<q>")]
)
+;; DImode is special, we want to avoid computing operations which are
+;; more naturally computed in general purpose registers in the vector
+;; registers. If we do that, we need to move all three operands from general
+;; purpose registers to vector registers, then back again. However, we
+;; don't want to make this pattern an UNSPEC as we'd lose scope for
+;; optimizations based on the component operations of a BSL.
+;;
+;; That means we need a splitter back to the individual operations, if they
+;; would be better calculated on the integer side.
+
+(define_insn_and_split "aarch64_simd_bsldi_internal"
+ [(set (match_operand:DI 0 "register_operand" "=w,w,w,&r")
+ (xor:DI
+ (and:DI
+ (xor:DI
+ (match_operand:DI 3 "register_operand" "w,0,w,r")
+ (match_operand:DI 2 "register_operand" "w,w,0,r"))
+ (match_operand:DI 1 "register_operand" "0,w,w,r"))
+ (match_dup:DI 3)
+ ))]
+ "TARGET_SIMD"
+ "@
+ bsl\\t%0.8b, %2.8b, %3.8b
+ bit\\t%0.8b, %2.8b, %1.8b
+ bif\\t%0.8b, %3.8b, %1.8b
+ #"
+ "&& GP_REGNUM_P (REGNO (operands[0]))"
+ [(match_dup 0) (match_dup 1) (match_dup 2) (match_dup 3)]
+{
+ /* Split back to individual operations. */
+ emit_insn (gen_xordi3 (operands[0], operands[2], operands[3]));
+ emit_insn (gen_anddi3 (operands[0], operands[0], operands[1]));
+ emit_insn (gen_xordi3 (operands[0], operands[0], operands[3]));
+ DONE;
+}
+ [(set_attr "type" "neon_bsl")]
+)
+
(define_expand "aarch64_simd_bsl<mode>"
[(match_operand:VALLDIF 0 "register_operand")
(match_operand:<V_cmp_result> 1 "register_operand")
new file mode 100644
@@ -0,0 +1,13 @@
+/* { dg-do compile } */
+/* { dg-options "-O2" } */
+
+/* Test that we can generate DImode BSL when we are using
+ copysign. */
+
+double
+foo (double a, double b)
+{
+ return __builtin_copysign (a, b);
+}
+
+/* { dg-final { scan-assembler "bsl\tv\[0-9\]" } } */
new file mode 100644
@@ -0,0 +1,30 @@
+/* { dg-do compile } */
+/* { dg-options "-O2" } */
+
+/* Test that we don't combine to BSL when in DImode, avoiding register
+ moves in the general case.
+
+ We want:
+ eor x0, x0, x1
+ and x0, x0, x2
+ eor x0, x0, x1
+ ret
+
+ Rather than:
+ fmov d2, x0
+ fmov d0, x2
+ fmov d1, x1
+ bsl v0.8b, v2.8b, v1.8b
+ fmov x0, d0
+ ret */
+
+long long
+foo (long long a, long long b, long long c)
+{
+ return ((a ^ b) & c) ^ b;
+}
+
+/* { dg-final { scan-assembler-not "bsl\tv\[0-9\]" } } */
+/* { dg-final { scan-assembler-not "bif\tv\[0-9\]" } } */
+/* { dg-final { scan-assembler-not "bit\tv\[0-9\]" } } */
+/* { dg-final { scan-assembler-not "fmov\td\[0-9\]" } } */