===================================================================
@@ -1220,6 +1220,9 @@ (define_code_iterator SVE_INT_BINARY [pl
;; SVE integer binary division operations.
(define_code_iterator SVE_INT_BINARY_SD [div udiv])
+;; SVE floating-point operations with an unpredicated all-register form.
+(define_code_iterator SVE_UNPRED_FP_BINARY [plus minus mult])
+
;; SVE integer comparisons.
(define_code_iterator SVE_INT_CMP [lt le eq ne ge gt ltu leu geu gtu])
@@ -1423,6 +1426,8 @@ (define_code_attr sve_int_op_rev [(plus
;; The floating-point SVE instruction that implements an rtx code.
(define_code_attr sve_fp_op [(plus "fadd")
+ (minus "fsub")
+ (mult "fmul")
(neg "fneg")
(abs "fabs")
(sqrt "fsqrt")])
===================================================================
@@ -2194,7 +2194,7 @@ (define_expand "add<mode>3"
)
;; Floating-point addition predicated with a PTRUE.
-(define_insn "*add<mode>3"
+(define_insn_and_split "*add<mode>3"
[(set (match_operand:SVE_F 0 "register_operand" "=w, w, w")
(unspec:SVE_F
[(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl")
@@ -2206,7 +2206,12 @@ (define_insn "*add<mode>3"
"@
fadd\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3
fsub\t%0.<Vetype>, %1/m, %0.<Vetype>, #%N3
- fadd\t%0.<Vetype>, %2.<Vetype>, %3.<Vetype>"
+ #"
+ ; Split the unpredicated form after reload, so that we don't have
+ ; the unnecessary PTRUE.
+ "&& reload_completed
+ && register_operand (operands[3], <MODE>mode)"
+ [(set (match_dup 0) (plus:SVE_F (match_dup 2) (match_dup 3)))]
)
;; Unpredicated floating-point subtraction.
@@ -2225,7 +2230,7 @@ (define_expand "sub<mode>3"
)
;; Floating-point subtraction predicated with a PTRUE.
-(define_insn "*sub<mode>3"
+(define_insn_and_split "*sub<mode>3"
[(set (match_operand:SVE_F 0 "register_operand" "=w, w, w, w")
(unspec:SVE_F
[(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl, Upl")
@@ -2240,7 +2245,13 @@ (define_insn "*sub<mode>3"
fsub\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3
fadd\t%0.<Vetype>, %1/m, %0.<Vetype>, #%N3
fsubr\t%0.<Vetype>, %1/m, %0.<Vetype>, #%2
- fsub\t%0.<Vetype>, %2.<Vetype>, %3.<Vetype>"
+ #"
+ ; Split the unpredicated form after reload, so that we don't have
+ ; the unnecessary PTRUE.
+ "&& reload_completed
+ && register_operand (operands[2], <MODE>mode)
+ && register_operand (operands[3], <MODE>mode)"
+ [(set (match_dup 0) (minus:SVE_F (match_dup 2) (match_dup 3)))]
)
;; Unpredicated floating-point multiplication.
@@ -2259,7 +2270,7 @@ (define_expand "mul<mode>3"
)
;; Floating-point multiplication predicated with a PTRUE.
-(define_insn "*mul<mode>3"
+(define_insn_and_split "*mul<mode>3"
[(set (match_operand:SVE_F 0 "register_operand" "=w, w")
(unspec:SVE_F
[(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
@@ -2270,8 +2281,24 @@ (define_insn "*mul<mode>3"
"TARGET_SVE"
"@
fmul\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3
- fmul\t%0.<Vetype>, %2.<Vetype>, %3.<Vetype>"
-)
+ #"
+ ; Split the unpredicated form after reload, so that we don't have
+ ; the unnecessary PTRUE.
+ "&& reload_completed
+ && register_operand (operands[3], <MODE>mode)"
+ [(set (match_dup 0) (mult:SVE_F (match_dup 2) (match_dup 3)))]
+)
+
+;; Unpredicated floating-point binary operations (post-RA only).
+;; These are generated by splitting a predicated instruction whose
+;; predicate is unused.
+(define_insn "*post_ra_<sve_fp_op><mode>3"
+ [(set (match_operand:SVE_F 0 "register_operand" "=w")
+ (SVE_UNPRED_FP_BINARY:SVE_F
+ (match_operand:SVE_F 1 "register_operand" "w")
+ (match_operand:SVE_F 2 "register_operand" "w")))]
+ "TARGET_SVE && reload_completed"
+ "<sve_fp_op>\t%0.<Vetype>, %1.<Vetype>, %2.<Vetype>")
;; Unpredicated fma (%0 = (%1 * %2) + %3).
(define_expand "fma<mode>4"
===================================================================
@@ -0,0 +1,23 @@
+/* { dg-options "-O2 -ftree-vectorize" } */
+
+#define TEST_OP(NAME, TYPE, OP) \
+ void \
+ NAME##_##TYPE (TYPE *restrict a, TYPE *restrict b, \
+ TYPE *restrict c, int n) \
+ { \
+ for (int i = 0; i < n; ++i) \
+ a[i] = b[i] OP c[i]; \
+ }
+
+#define TEST_TYPE(TYPE) \
+ TEST_OP (add, TYPE, +) \
+ TEST_OP (sub, TYPE, -) \
+ TEST_OP (mult, TYPE, *) \
+
+TEST_TYPE (float)
+TEST_TYPE (double)
+
+/* { dg-final { scan-assembler-times {\tfadd\t} 2 } } */
+/* { dg-final { scan-assembler-times {\tfsub\t} 2 } } */
+/* { dg-final { scan-assembler-times {\tfmul\t} 2 } } */
+/* { dg-final { scan-assembler-not {\tptrue\t} } } */