===================================================================
@@ -18,8 +18,120 @@
;; along with GCC; see the file COPYING3. If not see
;; <http://www.gnu.org/licenses/>.
-;; Note on the handling of big-endian SVE
-;; --------------------------------------
+;; The file is organised into the following sections (search for the full
+;; line):
+;;
+;; == General notes
+;; ---- Note on the handling of big-endian SVE
+;;
+;; == Moves
+;; ---- Moves of single vectors
+;; ---- Moves of multiple vectors
+;; ---- Moves of predicates
+;;
+;; == Loads
+;; ---- Normal contiguous loads
+;; ---- Normal gather loads
+;;
+;; == Stores
+;; ---- Normal contiguous stores
+;; ---- Normal scatter stores
+;;
+;; == Vector creation
+;; ---- [INT,FP] Duplicate element
+;; ---- [INT,FP] Initialize from individual elements
+;; ---- [INT] Linear series
+;; ---- [PRED] Duplicate element
+;;
+;; == Vector decomposition
+;; ---- [INT,FP] Extract index
+;; ---- [INT,FP] Extract active element
+;; ---- [PRED] Extract index
+;;
+;; == Unary arithmetic
+;; ---- [INT] General unary arithmetic corresponding to rtx codes
+;; ---- [FP] General unary arithmetic corresponding to rtx codes
+;; ---- [FP] Rounding
+;; ---- [PRED] Inverse
+
+;; == Binary arithmetic
+;; ---- [INT] General binary arithmetic corresponding to rtx codes
+;; ---- [INT] Addition
+;; ---- [INT] Subtraction
+;; ---- [INT] Absolute difference
+;; ---- [INT] Multiplication
+;; ---- [INT] Highpart multiplication
+;; ---- [INT] Division
+;; ---- [INT] Binary logical operations
+;; ---- [INT] Binary logical operations (inverted second input)
+;; ---- [INT] Shifts
+;; ---- [INT] Maximum and minimum
+;; ---- [FP] General binary arithmetic corresponding to rtx codes
+;; ---- [FP] General binary arithmetic corresponding to unspecs
+;; ---- [FP] Addition
+;; ---- [FP] Subtraction
+;; ---- [FP] Absolute difference
+;; ---- [FP] Multiplication
+;; ---- [FP] Division
+;; ---- [FP] Binary logical operations
+;; ---- [FP] Sign copying
+;; ---- [FP] Maximum and minimum
+;; ---- [PRED] Binary logical operations
+;; ---- [PRED] Binary logical operations (inverted second input)
+;; ---- [PRED] Binary logical operations (inverted result)
+;;
+;; == Ternary arithmetic
+;; ---- [INT] MLA and MAD
+;; ---- [INT] MLS and MSB
+;; ---- [INT] Dot product
+;; ---- [INT] Sum of absolute differences
+;; ---- [FP] General ternary arithmetic corresponding to unspecs
+;; ---- [FP] FMLA and FMAD
+;; ---- [FP] FMLS and FMSB
+;; ---- [FP] FNMLA and FNMAD
+;; ---- [FP] FNMLS and FNMSB
+;;
+;; == Comparisons and selects
+;; ---- [INT,FP] Select based on predicates
+;; ---- [INT,FP] Compare and select
+;; ---- [INT] Comparisons
+;; ---- [INT] While tests
+;; ---- [FP] Comparisons
+;; ---- [PRED] Test bits
+;;
+;; == Reductions
+;; ---- [INT,FP] Conditional reductions
+;; ---- [INT] Tree reductions
+;; ---- [FP] Tree reductions
+;; ---- [FP] Left-to-right reductions
+;;
+;; == Permutes
+;; ---- [INT,FP] General permutes
+;; ---- [INT,FP] Special-purpose unary permutes
+;; ---- [INT,FP] Special-purpose binary permutes
+;; ---- [PRED] Special-purpose binary permutes
+;;
+;; == Conversions
+;; ---- [INT<-INT] Packs
+;; ---- [INT<-INT] Unpacks
+;; ---- [INT<-FP] Conversions
+;; ---- [INT<-FP] Packs
+;; ---- [INT<-FP] Unpacks
+;; ---- [FP<-INT] Conversions
+;; ---- [FP<-INT] Packs
+;; ---- [FP<-INT] Unpacks
+;; ---- [FP<-FP] Packs
+;; ---- [FP<-FP] Unpacks
+;; ---- [PRED<-PRED] Packs
+;; ---- [PRED<-PRED] Unpacks
+
+;; =========================================================================
+;; == General notes
+;; =========================================================================
+;;
+;; -------------------------------------------------------------------------
+;; ---- Note on the handling of big-endian SVE
+;; -------------------------------------------------------------------------
;;
;; On big-endian systems, Advanced SIMD mov<mode> patterns act in the
;; same way as movdi or movti would: the first byte of memory goes
@@ -61,7 +173,27 @@
;; reserve a predicate register.
-;; SVE data moves.
+;; =========================================================================
+;; == Moves
+;; =========================================================================
+
+;; -------------------------------------------------------------------------
+;; ---- Moves of single vectors
+;; -------------------------------------------------------------------------
+;; Includes:
+;; - MOV (including aliases)
+;; - LD1B (contiguous form)
+;; - LD1D ( " " )
+;; - LD1H ( " " )
+;; - LD1W ( " " )
+;; - LDR
+;; - ST1B (contiguous form)
+;; - ST1D ( " " )
+;; - ST1H ( " " )
+;; - ST1W ( " " )
+;; - STR
+;; -------------------------------------------------------------------------
+
(define_expand "mov<mode>"
[(set (match_operand:SVE_ALL 0 "nonimmediate_operand")
(match_operand:SVE_ALL 1 "general_operand"))]
@@ -93,22 +225,13 @@ (define_expand "mov<mode>"
}
)
-;; A pattern for optimizing SUBREGs that have a reinterpreting effect
-;; on big-endian targets; see aarch64_maybe_expand_sve_subreg_move
-;; for details. We use a special predicate for operand 2 to reduce
-;; the number of patterns.
-(define_insn_and_split "*aarch64_sve_mov<mode>_subreg_be"
- [(set (match_operand:SVE_ALL 0 "aarch64_sve_nonimmediate_operand" "=w")
- (unspec:SVE_ALL
- [(match_operand:VNx16BI 1 "register_operand" "Upl")
- (match_operand 2 "aarch64_any_register_operand" "w")]
- UNSPEC_REV_SUBREG))]
- "TARGET_SVE && BYTES_BIG_ENDIAN"
- "#"
- "&& reload_completed"
- [(const_int 0)]
+(define_expand "movmisalign<mode>"
+ [(set (match_operand:SVE_ALL 0 "nonimmediate_operand")
+ (match_operand:SVE_ALL 1 "general_operand"))]
+ "TARGET_SVE"
{
- aarch64_split_sve_subreg_move (operands[0], operands[1], operands[2]);
+ /* Equivalent to a normal move for our purpooses. */
+ emit_move_insn (operands[0], operands[1]);
DONE;
}
)
@@ -167,10 +290,9 @@ (define_expand "aarch64_sve_reload_be"
}
)
-;; A predicated load or store for which the predicate is known to be
-;; all-true. Note that this pattern is generated directly by
-;; aarch64_emit_sve_pred_move, so changes to this pattern will
-;; need changes there as well.
+;; A predicated move in which the predicate is known to be all-true.
+;; Note that this pattern is generated directly by aarch64_emit_sve_pred_move,
+;; so changes to this pattern will need changes there as well.
(define_insn_and_split "@aarch64_pred_mov<mode>"
[(set (match_operand:SVE_ALL 0 "nonimmediate_operand" "=w, w, m")
(unspec:SVE_ALL
@@ -189,17 +311,193 @@ (define_insn_and_split "@aarch64_pred_mo
[(set (match_dup 0) (match_dup 2))]
)
-(define_expand "movmisalign<mode>"
- [(set (match_operand:SVE_ALL 0 "nonimmediate_operand")
- (match_operand:SVE_ALL 1 "general_operand"))]
+;; A pattern for optimizing SUBREGs that have a reinterpreting effect
+;; on big-endian targets; see aarch64_maybe_expand_sve_subreg_move
+;; for details. We use a special predicate for operand 2 to reduce
+;; the number of patterns.
+(define_insn_and_split "*aarch64_sve_mov<mode>_subreg_be"
+ [(set (match_operand:SVE_ALL 0 "aarch64_sve_nonimmediate_operand" "=w")
+ (unspec:SVE_ALL
+ [(match_operand:VNx16BI 1 "register_operand" "Upl")
+ (match_operand 2 "aarch64_any_register_operand" "w")]
+ UNSPEC_REV_SUBREG))]
+ "TARGET_SVE && BYTES_BIG_ENDIAN"
+ "#"
+ "&& reload_completed"
+ [(const_int 0)]
+ {
+ aarch64_split_sve_subreg_move (operands[0], operands[1], operands[2]);
+ DONE;
+ }
+)
+
+;; -------------------------------------------------------------------------
+;; ---- Moves of multiple vectors
+;; -------------------------------------------------------------------------
+;; All patterns in this section are synthetic and split to real
+;; instructions after reload.
+;; -------------------------------------------------------------------------
+
+(define_expand "mov<mode>"
+ [(set (match_operand:SVE_STRUCT 0 "nonimmediate_operand")
+ (match_operand:SVE_STRUCT 1 "general_operand"))]
"TARGET_SVE"
{
- /* Equivalent to a normal move for our purpooses. */
- emit_move_insn (operands[0], operands[1]);
+ /* Big-endian loads and stores need to be done via LD1 and ST1;
+ see the comment at the head of the file for details. */
+ if ((MEM_P (operands[0]) || MEM_P (operands[1]))
+ && BYTES_BIG_ENDIAN)
+ {
+ gcc_assert (can_create_pseudo_p ());
+ aarch64_expand_sve_mem_move (operands[0], operands[1], <VPRED>mode);
+ DONE;
+ }
+
+ if (CONSTANT_P (operands[1]))
+ {
+ aarch64_expand_mov_immediate (operands[0], operands[1]);
+ DONE;
+ }
+ }
+)
+
+;; Unpredicated structure moves (little-endian).
+(define_insn "*aarch64_sve_mov<mode>_le"
+ [(set (match_operand:SVE_STRUCT 0 "aarch64_sve_nonimmediate_operand" "=w, Utr, w, w")
+ (match_operand:SVE_STRUCT 1 "aarch64_sve_general_operand" "Utr, w, w, Dn"))]
+ "TARGET_SVE && !BYTES_BIG_ENDIAN"
+ "#"
+ [(set_attr "length" "<insn_length>")]
+)
+
+;; Unpredicated structure moves (big-endian). Memory accesses require
+;; secondary reloads.
+(define_insn "*aarch64_sve_mov<mode>_be"
+ [(set (match_operand:SVE_STRUCT 0 "register_operand" "=w, w")
+ (match_operand:SVE_STRUCT 1 "aarch64_nonmemory_operand" "w, Dn"))]
+ "TARGET_SVE && BYTES_BIG_ENDIAN"
+ "#"
+ [(set_attr "length" "<insn_length>")]
+)
+
+;; Split unpredicated structure moves into pieces. This is the same
+;; for both big-endian and little-endian code, although it only needs
+;; to handle memory operands for little-endian code.
+(define_split
+ [(set (match_operand:SVE_STRUCT 0 "aarch64_sve_nonimmediate_operand")
+ (match_operand:SVE_STRUCT 1 "aarch64_sve_general_operand"))]
+ "TARGET_SVE && reload_completed"
+ [(const_int 0)]
+ {
+ rtx dest = operands[0];
+ rtx src = operands[1];
+ if (REG_P (dest) && REG_P (src))
+ aarch64_simd_emit_reg_reg_move (operands, <VSINGLE>mode, <vector_count>);
+ else
+ for (unsigned int i = 0; i < <vector_count>; ++i)
+ {
+ rtx subdest = simplify_gen_subreg (<VSINGLE>mode, dest, <MODE>mode,
+ i * BYTES_PER_SVE_VECTOR);
+ rtx subsrc = simplify_gen_subreg (<VSINGLE>mode, src, <MODE>mode,
+ i * BYTES_PER_SVE_VECTOR);
+ emit_insn (gen_rtx_SET (subdest, subsrc));
+ }
DONE;
}
)
+;; Predicated structure moves. This works for both endiannesses but in
+;; practice is only useful for big-endian.
+(define_insn_and_split "@aarch64_pred_mov<mode>"
+ [(set (match_operand:SVE_STRUCT 0 "aarch64_sve_struct_nonimmediate_operand" "=w, w, Utx")
+ (unspec:SVE_STRUCT
+ [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl")
+ (match_operand:SVE_STRUCT 2 "aarch64_sve_struct_nonimmediate_operand" "w, Utx, w")]
+ UNSPEC_MERGE_PTRUE))]
+ "TARGET_SVE
+ && (register_operand (operands[0], <MODE>mode)
+ || register_operand (operands[2], <MODE>mode))"
+ "#"
+ "&& reload_completed"
+ [(const_int 0)]
+ {
+ for (unsigned int i = 0; i < <vector_count>; ++i)
+ {
+ rtx subdest = simplify_gen_subreg (<VSINGLE>mode, operands[0],
+ <MODE>mode,
+ i * BYTES_PER_SVE_VECTOR);
+ rtx subsrc = simplify_gen_subreg (<VSINGLE>mode, operands[2],
+ <MODE>mode,
+ i * BYTES_PER_SVE_VECTOR);
+ aarch64_emit_sve_pred_move (subdest, operands[1], subsrc);
+ }
+ DONE;
+ }
+ [(set_attr "length" "<insn_length>")]
+)
+
+;; -------------------------------------------------------------------------
+;; ---- Moves of predicates
+;; -------------------------------------------------------------------------
+;; Includes:
+;; - MOV
+;; - LDR
+;; - PFALSE
+;; - PTRUE
+;; - STR
+;; -------------------------------------------------------------------------
+
+(define_expand "mov<mode>"
+ [(set (match_operand:PRED_ALL 0 "nonimmediate_operand")
+ (match_operand:PRED_ALL 1 "general_operand"))]
+ "TARGET_SVE"
+ {
+ if (GET_CODE (operands[0]) == MEM)
+ operands[1] = force_reg (<MODE>mode, operands[1]);
+ }
+)
+
+(define_insn "*aarch64_sve_mov<mode>"
+ [(set (match_operand:PRED_ALL 0 "nonimmediate_operand" "=Upa, m, Upa, Upa, Upa")
+ (match_operand:PRED_ALL 1 "general_operand" "Upa, Upa, m, Dz, Dm"))]
+ "TARGET_SVE
+ && (register_operand (operands[0], <MODE>mode)
+ || register_operand (operands[1], <MODE>mode))"
+ "@
+ mov\t%0.b, %1.b
+ str\t%1, %0
+ ldr\t%0, %1
+ pfalse\t%0.b
+ * return aarch64_output_ptrue (<MODE>mode, '<Vetype>');"
+)
+
+;; =========================================================================
+;; == Loads
+;; =========================================================================
+
+;; -------------------------------------------------------------------------
+;; ---- Normal contiguous loads
+;; -------------------------------------------------------------------------
+;; Includes contiguous forms of:
+;; - LD1B
+;; - LD1D
+;; - LD1H
+;; - LD1W
+;; - LD2B
+;; - LD2D
+;; - LD2H
+;; - LD2W
+;; - LD3B
+;; - LD3D
+;; - LD3H
+;; - LD3W
+;; - LD4B
+;; - LD4D
+;; - LD4H
+;; - LD4W
+;; -------------------------------------------------------------------------
+
+;; Predicated LD1.
(define_insn "maskload<mode><vpred>"
[(set (match_operand:SVE_ALL 0 "register_operand" "=w")
(unspec:SVE_ALL
@@ -210,16 +508,38 @@ (define_insn "maskload<mode><vpred>"
"ld1<Vesize>\t%0.<Vetype>, %2/z, %1"
)
-(define_insn "maskstore<mode><vpred>"
- [(set (match_operand:SVE_ALL 0 "memory_operand" "+m")
- (unspec:SVE_ALL [(match_operand:<VPRED> 2 "register_operand" "Upl")
- (match_operand:SVE_ALL 1 "register_operand" "w")
- (match_dup 0)]
- UNSPEC_ST1_SVE))]
+;; Unpredicated LD[234].
+(define_expand "vec_load_lanes<mode><vsingle>"
+ [(set (match_operand:SVE_STRUCT 0 "register_operand")
+ (unspec:SVE_STRUCT
+ [(match_dup 2)
+ (match_operand:SVE_STRUCT 1 "memory_operand")]
+ UNSPEC_LDN))]
"TARGET_SVE"
- "st1<Vesize>\t%1.<Vetype>, %2, %0"
+ {
+ operands[2] = aarch64_ptrue_reg (<VPRED>mode);
+ }
+)
+
+;; Predicated LD[234].
+(define_insn "vec_mask_load_lanes<mode><vsingle>"
+ [(set (match_operand:SVE_STRUCT 0 "register_operand" "=w")
+ (unspec:SVE_STRUCT
+ [(match_operand:<VPRED> 2 "register_operand" "Upl")
+ (match_operand:SVE_STRUCT 1 "memory_operand" "m")]
+ UNSPEC_LDN))]
+ "TARGET_SVE"
+ "ld<vector_count><Vesize>\t%0, %2/z, %1"
)
+;; -------------------------------------------------------------------------
+;; ---- Normal gather loads
+;; -------------------------------------------------------------------------
+;; Includes gather forms of:
+;; - LD1D
+;; - LD1W
+;; -------------------------------------------------------------------------
+
;; Unpredicated gather loads.
(define_expand "gather_load<mode>"
[(set (match_operand:SVE_SD 0 "register_operand")
@@ -277,7 +597,82 @@ (define_insn "mask_gather_load<mode>"
ld1d\t%0.d, %5/z, [%1, %2.d, lsl %p4]"
)
-;; Unpredicated scatter store.
+;; =========================================================================
+;; == Stores
+;; =========================================================================
+
+;; -------------------------------------------------------------------------
+;; ---- Normal contiguous stores
+;; -------------------------------------------------------------------------
+;; Includes contiguous forms of:
+;; - ST1B
+;; - ST1D
+;; - ST1H
+;; - ST1W
+;; - ST2B
+;; - ST2D
+;; - ST2H
+;; - ST2W
+;; - ST3B
+;; - ST3D
+;; - ST3H
+;; - ST3W
+;; - ST4B
+;; - ST4D
+;; - ST4H
+;; - ST4W
+;; -------------------------------------------------------------------------
+
+;; Predicated ST1.
+(define_insn "maskstore<mode><vpred>"
+ [(set (match_operand:SVE_ALL 0 "memory_operand" "+m")
+ (unspec:SVE_ALL [(match_operand:<VPRED> 2 "register_operand" "Upl")
+ (match_operand:SVE_ALL 1 "register_operand" "w")
+ (match_dup 0)]
+ UNSPEC_ST1_SVE))]
+ "TARGET_SVE"
+ "st1<Vesize>\t%1.<Vetype>, %2, %0"
+)
+
+;; Unpredicated ST[234]. This is always a full update, so the dependence
+;; on the old value of the memory location (via (match_dup 0)) is redundant.
+;; There doesn't seem to be any obvious benefit to treating the all-true
+;; case differently though. In particular, it's very unlikely that we'll
+;; only find out during RTL that a store_lanes is dead.
+(define_expand "vec_store_lanes<mode><vsingle>"
+ [(set (match_operand:SVE_STRUCT 0 "memory_operand")
+ (unspec:SVE_STRUCT
+ [(match_dup 2)
+ (match_operand:SVE_STRUCT 1 "register_operand")
+ (match_dup 0)]
+ UNSPEC_STN))]
+ "TARGET_SVE"
+ {
+ operands[2] = aarch64_ptrue_reg (<VPRED>mode);
+ }
+)
+
+;; Predicated ST[234].
+(define_insn "vec_mask_store_lanes<mode><vsingle>"
+ [(set (match_operand:SVE_STRUCT 0 "memory_operand" "+m")
+ (unspec:SVE_STRUCT
+ [(match_operand:<VPRED> 2 "register_operand" "Upl")
+ (match_operand:SVE_STRUCT 1 "register_operand" "w")
+ (match_dup 0)]
+ UNSPEC_STN))]
+ "TARGET_SVE"
+ "st<vector_count><Vesize>\t%1, %2, %0"
+)
+
+;; -------------------------------------------------------------------------
+;; ---- Normal scatter stores
+;; -------------------------------------------------------------------------
+;; Includes scatter forms of:
+;; - ST1D
+;; - ST1W
+;; -------------------------------------------------------------------------
+
+;; Unpredicated scatter stores.
(define_expand "scatter_store<mode>"
[(set (mem:BLK (scratch))
(unspec:BLK
@@ -334,148 +729,199 @@ (define_insn "mask_scatter_store<mode>"
st1d\t%4.d, %5, [%0, %1.d, lsl %p3]"
)
-;; SVE structure moves.
-(define_expand "mov<mode>"
- [(set (match_operand:SVE_STRUCT 0 "nonimmediate_operand")
- (match_operand:SVE_STRUCT 1 "general_operand"))]
+;; =========================================================================
+;; == Vector creation
+;; =========================================================================
+
+;; -------------------------------------------------------------------------
+;; ---- [INT,FP] Duplicate element
+;; -------------------------------------------------------------------------
+;; Includes:
+;; - MOV
+;; - LD1RB
+;; - LD1RD
+;; - LD1RH
+;; - LD1RW
+;; - LD1RQB
+;; - LD1RQD
+;; - LD1RQH
+;; - LD1RQW
+;; -------------------------------------------------------------------------
+
+(define_expand "vec_duplicate<mode>"
+ [(parallel
+ [(set (match_operand:SVE_ALL 0 "register_operand")
+ (vec_duplicate:SVE_ALL
+ (match_operand:<VEL> 1 "aarch64_sve_dup_operand")))
+ (clobber (scratch:<VPRED>))])]
"TARGET_SVE"
{
- /* Big-endian loads and stores need to be done via LD1 and ST1;
- see the comment at the head of the file for details. */
- if ((MEM_P (operands[0]) || MEM_P (operands[1]))
- && BYTES_BIG_ENDIAN)
+ if (MEM_P (operands[1]))
{
- gcc_assert (can_create_pseudo_p ());
- aarch64_expand_sve_mem_move (operands[0], operands[1], <VPRED>mode);
+ rtx ptrue = aarch64_ptrue_reg (<VPRED>mode);
+ emit_insn (gen_sve_ld1r<mode> (operands[0], ptrue, operands[1],
+ CONST0_RTX (<MODE>mode)));
DONE;
}
+ }
+)
- if (CONSTANT_P (operands[1]))
- {
- aarch64_expand_mov_immediate (operands[0], operands[1]);
- DONE;
- }
+;; Accept memory operands for the benefit of combine, and also in case
+;; the scalar input gets spilled to memory during RA. We want to split
+;; the load at the first opportunity in order to allow the PTRUE to be
+;; optimized with surrounding code.
+(define_insn_and_split "*vec_duplicate<mode>_reg"
+ [(set (match_operand:SVE_ALL 0 "register_operand" "=w, w, w")
+ (vec_duplicate:SVE_ALL
+ (match_operand:<VEL> 1 "aarch64_sve_dup_operand" "r, w, Uty")))
+ (clobber (match_scratch:<VPRED> 2 "=X, X, Upl"))]
+ "TARGET_SVE"
+ "@
+ mov\t%0.<Vetype>, %<vwcore>1
+ mov\t%0.<Vetype>, %<Vetype>1
+ #"
+ "&& MEM_P (operands[1])"
+ [(const_int 0)]
+ {
+ if (GET_CODE (operands[2]) == SCRATCH)
+ operands[2] = gen_reg_rtx (<VPRED>mode);
+ emit_move_insn (operands[2], CONSTM1_RTX (<VPRED>mode));
+ emit_insn (gen_sve_ld1r<mode> (operands[0], operands[2], operands[1],
+ CONST0_RTX (<MODE>mode)));
+ DONE;
}
+ [(set_attr "length" "4,4,8")]
)
-;; Unpredicated structure moves (little-endian).
-(define_insn "*aarch64_sve_mov<mode>_le"
- [(set (match_operand:SVE_STRUCT 0 "aarch64_sve_nonimmediate_operand" "=w, Utr, w, w")
- (match_operand:SVE_STRUCT 1 "aarch64_sve_general_operand" "Utr, w, w, Dn"))]
- "TARGET_SVE && !BYTES_BIG_ENDIAN"
- "#"
- [(set_attr "length" "<insn_length>")]
+;; This is used for vec_duplicate<mode>s from memory, but can also
+;; be used by combine to optimize selects of a a vec_duplicate<mode>
+;; with zero.
+(define_insn "sve_ld1r<mode>"
+ [(set (match_operand:SVE_ALL 0 "register_operand" "=w")
+ (unspec:SVE_ALL
+ [(match_operand:<VPRED> 1 "register_operand" "Upl")
+ (vec_duplicate:SVE_ALL
+ (match_operand:<VEL> 2 "aarch64_sve_ld1r_operand" "Uty"))
+ (match_operand:SVE_ALL 3 "aarch64_simd_imm_zero")]
+ UNSPEC_SEL))]
+ "TARGET_SVE"
+ "ld1r<Vesize>\t%0.<Vetype>, %1/z, %2"
)
-;; Unpredicated structure moves (big-endian). Memory accesses require
-;; secondary reloads.
-(define_insn "*aarch64_sve_mov<mode>_le"
- [(set (match_operand:SVE_STRUCT 0 "register_operand" "=w, w")
- (match_operand:SVE_STRUCT 1 "aarch64_nonmemory_operand" "w, Dn"))]
- "TARGET_SVE && BYTES_BIG_ENDIAN"
- "#"
- [(set_attr "length" "<insn_length>")]
+;; Load 128 bits from memory and duplicate to fill a vector. Since there
+;; are so few operations on 128-bit "elements", we don't define a VNx1TI
+;; and simply use vectors of bytes instead.
+(define_insn "*sve_ld1rq<Vesize>"
+ [(set (match_operand:SVE_ALL 0 "register_operand" "=w")
+ (unspec:SVE_ALL
+ [(match_operand:<VPRED> 1 "register_operand" "Upl")
+ (match_operand:TI 2 "aarch64_sve_ld1r_operand" "Uty")]
+ UNSPEC_LD1RQ))]
+ "TARGET_SVE"
+ "ld1rq<Vesize>\t%0.<Vetype>, %1/z, %2"
)
-;; Split unpredicated structure moves into pieces. This is the same
-;; for both big-endian and little-endian code, although it only needs
-;; to handle memory operands for little-endian code.
-(define_split
- [(set (match_operand:SVE_STRUCT 0 "aarch64_sve_nonimmediate_operand")
- (match_operand:SVE_STRUCT 1 "aarch64_sve_general_operand"))]
- "TARGET_SVE && reload_completed"
- [(const_int 0)]
+;; -------------------------------------------------------------------------
+;; ---- [INT,FP] Initialize from individual elements
+;; -------------------------------------------------------------------------
+;; Includes:
+;; - INSR
+;; -------------------------------------------------------------------------
+
+(define_expand "vec_init<mode><Vel>"
+ [(match_operand:SVE_ALL 0 "register_operand")
+ (match_operand 1 "")]
+ "TARGET_SVE"
{
- rtx dest = operands[0];
- rtx src = operands[1];
- if (REG_P (dest) && REG_P (src))
- aarch64_simd_emit_reg_reg_move (operands, <VSINGLE>mode, <vector_count>);
- else
- for (unsigned int i = 0; i < <vector_count>; ++i)
- {
- rtx subdest = simplify_gen_subreg (<VSINGLE>mode, dest, <MODE>mode,
- i * BYTES_PER_SVE_VECTOR);
- rtx subsrc = simplify_gen_subreg (<VSINGLE>mode, src, <MODE>mode,
- i * BYTES_PER_SVE_VECTOR);
- emit_insn (gen_rtx_SET (subdest, subsrc));
- }
+ aarch64_sve_expand_vector_init (operands[0], operands[1]);
DONE;
}
)
-;; Predicated structure moves. This works for both endiannesses but in
-;; practice is only useful for big-endian.
-(define_insn_and_split "@aarch64_pred_mov<mode>"
- [(set (match_operand:SVE_STRUCT 0 "aarch64_sve_struct_nonimmediate_operand" "=w, w, Utx")
- (unspec:SVE_STRUCT
- [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl")
- (match_operand:SVE_STRUCT 2 "aarch64_sve_struct_nonimmediate_operand" "w, Utx, w")]
- UNSPEC_MERGE_PTRUE))]
- "TARGET_SVE
- && (register_operand (operands[0], <MODE>mode)
- || register_operand (operands[2], <MODE>mode))"
- "#"
- "&& reload_completed"
- [(const_int 0)]
- {
- for (unsigned int i = 0; i < <vector_count>; ++i)
- {
- rtx subdest = simplify_gen_subreg (<VSINGLE>mode, operands[0],
- <MODE>mode,
- i * BYTES_PER_SVE_VECTOR);
- rtx subsrc = simplify_gen_subreg (<VSINGLE>mode, operands[2],
- <MODE>mode,
- i * BYTES_PER_SVE_VECTOR);
- aarch64_emit_sve_pred_move (subdest, operands[1], subsrc);
- }
- DONE;
- }
- [(set_attr "length" "<insn_length>")]
+;; Shift an SVE vector left and insert a scalar into element 0.
+(define_insn "vec_shl_insert_<mode>"
+ [(set (match_operand:SVE_ALL 0 "register_operand" "=w, w")
+ (unspec:SVE_ALL
+ [(match_operand:SVE_ALL 1 "register_operand" "0, 0")
+ (match_operand:<VEL> 2 "register_operand" "rZ, w")]
+ UNSPEC_INSR))]
+ "TARGET_SVE"
+ "@
+ insr\t%0.<Vetype>, %<vwcore>2
+ insr\t%0.<Vetype>, %<Vetype>2"
)
-(define_expand "mov<mode>"
- [(set (match_operand:PRED_ALL 0 "nonimmediate_operand")
- (match_operand:PRED_ALL 1 "general_operand"))]
+;; -------------------------------------------------------------------------
+;; ---- [INT] Linear series
+;; -------------------------------------------------------------------------
+;; Includes:
+;; - INDEX
+;; -------------------------------------------------------------------------
+
+(define_insn "vec_series<mode>"
+ [(set (match_operand:SVE_I 0 "register_operand" "=w, w, w")
+ (vec_series:SVE_I
+ (match_operand:<VEL> 1 "aarch64_sve_index_operand" "Usi, r, r")
+ (match_operand:<VEL> 2 "aarch64_sve_index_operand" "r, Usi, r")))]
"TARGET_SVE"
+ "@
+ index\t%0.<Vetype>, #%1, %<vw>2
+ index\t%0.<Vetype>, %<vw>1, #%2
+ index\t%0.<Vetype>, %<vw>1, %<vw>2"
+)
+
+;; Optimize {x, x, x, x, ...} + {0, n, 2*n, 3*n, ...} if n is in range
+;; of an INDEX instruction.
+(define_insn "*vec_series<mode>_plus"
+ [(set (match_operand:SVE_I 0 "register_operand" "=w")
+ (plus:SVE_I
+ (vec_duplicate:SVE_I
+ (match_operand:<VEL> 1 "register_operand" "r"))
+ (match_operand:SVE_I 2 "immediate_operand")))]
+ "TARGET_SVE && aarch64_check_zero_based_sve_index_immediate (operands[2])"
{
- if (GET_CODE (operands[0]) == MEM)
- operands[1] = force_reg (<MODE>mode, operands[1]);
+ operands[2] = aarch64_check_zero_based_sve_index_immediate (operands[2]);
+ return "index\t%0.<Vetype>, %<vw>1, #%2";
}
)
-(define_insn "*aarch64_sve_mov<mode>"
- [(set (match_operand:PRED_ALL 0 "nonimmediate_operand" "=Upa, m, Upa, Upa, Upa")
- (match_operand:PRED_ALL 1 "general_operand" "Upa, Upa, m, Dz, Dm"))]
- "TARGET_SVE
- && (register_operand (operands[0], <MODE>mode)
- || register_operand (operands[1], <MODE>mode))"
- "@
- mov\t%0.b, %1.b
- str\t%1, %0
- ldr\t%0, %1
- pfalse\t%0.b
- * return aarch64_output_ptrue (<MODE>mode, '<Vetype>');"
-)
+;; -------------------------------------------------------------------------
+;; ---- [PRED] Duplicate element
+;; -------------------------------------------------------------------------
+;; The patterns in this section are synthetic.
+;; -------------------------------------------------------------------------
-;; Handle extractions from a predicate by converting to an integer vector
-;; and extracting from there.
-(define_expand "vec_extract<vpred><Vel>"
- [(match_operand:<VEL> 0 "register_operand")
- (match_operand:<VPRED> 1 "register_operand")
- (match_operand:SI 2 "nonmemory_operand")
- ;; Dummy operand to which we can attach the iterator.
- (reg:SVE_I V0_REGNUM)]
+;; Implement a predicate broadcast by shifting the low bit of the scalar
+;; input into the top bit and using a WHILELO. An alternative would be to
+;; duplicate the input and do a compare with zero.
+(define_expand "vec_duplicate<mode>"
+ [(set (match_operand:PRED_ALL 0 "register_operand")
+ (vec_duplicate:PRED_ALL (match_operand 1 "register_operand")))]
"TARGET_SVE"
{
- rtx tmp = gen_reg_rtx (<MODE>mode);
- emit_insn (gen_aarch64_sve_dup<mode>_const (tmp, operands[1],
- CONST1_RTX (<MODE>mode),
- CONST0_RTX (<MODE>mode)));
- emit_insn (gen_vec_extract<mode><Vel> (operands[0], tmp, operands[2]));
+ rtx tmp = gen_reg_rtx (DImode);
+ rtx op1 = gen_lowpart (DImode, operands[1]);
+ emit_insn (gen_ashldi3 (tmp, op1, gen_int_mode (63, DImode)));
+ emit_insn (gen_while_ultdi<mode> (operands[0], const0_rtx, tmp));
DONE;
}
)
+;; =========================================================================
+;; == Vector decomposition
+;; =========================================================================
+
+;; -------------------------------------------------------------------------
+;; ---- [INT,FP] Extract index
+;; -------------------------------------------------------------------------
+;; Includes:
+;; - DUP (Advanced SIMD)
+;; - DUP (SVE)
+;; - EXT (SVE)
+;; - ST1 (Advanced SIMD)
+;; - UMOV (Advanced SIMD)
+;; -------------------------------------------------------------------------
+
(define_expand "vec_extract<mode><Vel>"
[(set (match_operand:<VEL> 0 "register_operand")
(vec_select:<VEL>
@@ -606,6 +1052,13 @@ (define_insn "*vec_extract<mode><Vel>_ex
}
)
+;; -------------------------------------------------------------------------
+;; ---- [INT,FP] Extract active element
+;; -------------------------------------------------------------------------
+;; Includes:
+;; - LASTB
+;; -------------------------------------------------------------------------
+
;; Extract the last active element of operand 1 into operand 0.
;; If no elements are active, extract the last inactive element instead.
(define_insn "extract_last_<mode>"
@@ -620,284 +1073,284 @@ (define_insn "extract_last_<mode>"
lastb\t%<Vetype>0, %1, %2.<Vetype>"
)
-(define_expand "vec_duplicate<mode>"
- [(parallel
- [(set (match_operand:SVE_ALL 0 "register_operand")
- (vec_duplicate:SVE_ALL
- (match_operand:<VEL> 1 "aarch64_sve_dup_operand")))
- (clobber (scratch:<VPRED>))])]
- "TARGET_SVE"
- {
- if (MEM_P (operands[1]))
- {
- rtx ptrue = aarch64_ptrue_reg (<VPRED>mode);
- emit_insn (gen_sve_ld1r<mode> (operands[0], ptrue, operands[1],
- CONST0_RTX (<MODE>mode)));
- DONE;
- }
- }
-)
+;; -------------------------------------------------------------------------
+;; ---- [PRED] Extract index
+;; -------------------------------------------------------------------------
+;; The patterns in this section are synthetic.
+;; -------------------------------------------------------------------------
-;; Accept memory operands for the benefit of combine, and also in case
-;; the scalar input gets spilled to memory during RA. We want to split
-;; the load at the first opportunity in order to allow the PTRUE to be
-;; optimized with surrounding code.
-(define_insn_and_split "*vec_duplicate<mode>_reg"
- [(set (match_operand:SVE_ALL 0 "register_operand" "=w, w, w")
- (vec_duplicate:SVE_ALL
- (match_operand:<VEL> 1 "aarch64_sve_dup_operand" "r, w, Uty")))
- (clobber (match_scratch:<VPRED> 2 "=X, X, Upl"))]
+;; Handle extractions from a predicate by converting to an integer vector
+;; and extracting from there.
+(define_expand "vec_extract<vpred><Vel>"
+ [(match_operand:<VEL> 0 "register_operand")
+ (match_operand:<VPRED> 1 "register_operand")
+ (match_operand:SI 2 "nonmemory_operand")
+ ;; Dummy operand to which we can attach the iterator.
+ (reg:SVE_I V0_REGNUM)]
"TARGET_SVE"
- "@
- mov\t%0.<Vetype>, %<vwcore>1
- mov\t%0.<Vetype>, %<Vetype>1
- #"
- "&& MEM_P (operands[1])"
- [(const_int 0)]
{
- if (GET_CODE (operands[2]) == SCRATCH)
- operands[2] = gen_reg_rtx (<VPRED>mode);
- emit_move_insn (operands[2], CONSTM1_RTX (<VPRED>mode));
- emit_insn (gen_sve_ld1r<mode> (operands[0], operands[2], operands[1],
- CONST0_RTX (<MODE>mode)));
+ rtx tmp = gen_reg_rtx (<MODE>mode);
+ emit_insn (gen_aarch64_sve_dup<mode>_const (tmp, operands[1],
+ CONST1_RTX (<MODE>mode),
+ CONST0_RTX (<MODE>mode)));
+ emit_insn (gen_vec_extract<mode><Vel> (operands[0], tmp, operands[2]));
DONE;
}
- [(set_attr "length" "4,4,8")]
)
-;; This is used for vec_duplicate<mode>s from memory, but can also
-;; be used by combine to optimize selects of a a vec_duplicate<mode>
-;; with zero.
-(define_insn "sve_ld1r<mode>"
- [(set (match_operand:SVE_ALL 0 "register_operand" "=w")
- (unspec:SVE_ALL
- [(match_operand:<VPRED> 1 "register_operand" "Upl")
- (vec_duplicate:SVE_ALL
- (match_operand:<VEL> 2 "aarch64_sve_ld1r_operand" "Uty"))
- (match_operand:SVE_ALL 3 "aarch64_simd_imm_zero")]
- UNSPEC_SEL))]
- "TARGET_SVE"
- "ld1r<Vesize>\t%0.<Vetype>, %1/z, %2"
-)
+;; =========================================================================
+;; == Unary arithmetic
+;; =========================================================================
+
+;; -------------------------------------------------------------------------
+;; ---- [INT] General unary arithmetic corresponding to rtx codes
+;; -------------------------------------------------------------------------
+;; Includes:
+;; - ABS
+;; - CNT (= popcount)
+;; - NEG
+;; - NOT
+;; -------------------------------------------------------------------------
-;; Load 128 bits from memory and duplicate to fill a vector. Since there
-;; are so few operations on 128-bit "elements", we don't define a VNx1TI
-;; and simply use vectors of bytes instead.
-(define_insn "*sve_ld1rq<Vesize>"
- [(set (match_operand:SVE_ALL 0 "register_operand" "=w")
- (unspec:SVE_ALL
- [(match_operand:<VPRED> 1 "register_operand" "Upl")
- (match_operand:TI 2 "aarch64_sve_ld1r_operand" "Uty")]
- UNSPEC_LD1RQ))]
- "TARGET_SVE"
- "ld1rq<Vesize>\t%0.<Vetype>, %1/z, %2"
-)
-
-;; Implement a predicate broadcast by shifting the low bit of the scalar
-;; input into the top bit and using a WHILELO. An alternative would be to
-;; duplicate the input and do a compare with zero.
-(define_expand "vec_duplicate<mode>"
- [(set (match_operand:PRED_ALL 0 "register_operand")
- (vec_duplicate:PRED_ALL (match_operand 1 "register_operand")))]
+;; Unpredicated integer unary arithmetic.
+(define_expand "<optab><mode>2"
+ [(set (match_operand:SVE_I 0 "register_operand")
+ (unspec:SVE_I
+ [(match_dup 2)
+ (SVE_INT_UNARY:SVE_I (match_operand:SVE_I 1 "register_operand"))]
+ UNSPEC_MERGE_PTRUE))]
"TARGET_SVE"
{
- rtx tmp = gen_reg_rtx (DImode);
- rtx op1 = gen_lowpart (DImode, operands[1]);
- emit_insn (gen_ashldi3 (tmp, op1, gen_int_mode (63, DImode)));
- emit_insn (gen_while_ultdi<mode> (operands[0], const0_rtx, tmp));
- DONE;
+ operands[2] = aarch64_ptrue_reg (<VPRED>mode);
}
)
-(define_insn "vec_series<mode>"
- [(set (match_operand:SVE_I 0 "register_operand" "=w, w, w")
- (vec_series:SVE_I
- (match_operand:<VEL> 1 "aarch64_sve_index_operand" "Usi, r, r")
- (match_operand:<VEL> 2 "aarch64_sve_index_operand" "r, Usi, r")))]
+;; Integer unary arithmetic predicated with a PTRUE.
+(define_insn "*<optab><mode>2"
+ [(set (match_operand:SVE_I 0 "register_operand" "=w")
+ (unspec:SVE_I
+ [(match_operand:<VPRED> 1 "register_operand" "Upl")
+ (SVE_INT_UNARY:SVE_I
+ (match_operand:SVE_I 2 "register_operand" "w"))]
+ UNSPEC_MERGE_PTRUE))]
"TARGET_SVE"
- "@
- index\t%0.<Vetype>, #%1, %<vw>2
- index\t%0.<Vetype>, %<vw>1, #%2
- index\t%0.<Vetype>, %<vw>1, %<vw>2"
+ "<sve_int_op>\t%0.<Vetype>, %1/m, %2.<Vetype>"
)
-;; Optimize {x, x, x, x, ...} + {0, n, 2*n, 3*n, ...} if n is in range
-;; of an INDEX instruction.
-(define_insn "*vec_series<mode>_plus"
- [(set (match_operand:SVE_I 0 "register_operand" "=w")
- (plus:SVE_I
- (vec_duplicate:SVE_I
- (match_operand:<VEL> 1 "register_operand" "r"))
- (match_operand:SVE_I 2 "immediate_operand")))]
- "TARGET_SVE && aarch64_check_zero_based_sve_index_immediate (operands[2])"
- {
- operands[2] = aarch64_check_zero_based_sve_index_immediate (operands[2]);
- return "index\t%0.<Vetype>, %<vw>1, #%2";
- }
-)
+;; -------------------------------------------------------------------------
+;; ---- [FP] General unary arithmetic corresponding to rtx codes
+;; -------------------------------------------------------------------------
+;; Includes:
+;; - FABS
+;; - FNEG
+;; - FSQRT
+;; -------------------------------------------------------------------------
-;; Unpredicated LD[234].
-(define_expand "vec_load_lanes<mode><vsingle>"
- [(set (match_operand:SVE_STRUCT 0 "register_operand")
- (unspec:SVE_STRUCT
+;; Unpredicated floating-point unary operations.
+(define_expand "<optab><mode>2"
+ [(set (match_operand:SVE_F 0 "register_operand")
+ (unspec:SVE_F
[(match_dup 2)
- (match_operand:SVE_STRUCT 1 "memory_operand")]
- UNSPEC_LDN))]
+ (SVE_FP_UNARY:SVE_F (match_operand:SVE_F 1 "register_operand"))]
+ UNSPEC_MERGE_PTRUE))]
"TARGET_SVE"
{
operands[2] = aarch64_ptrue_reg (<VPRED>mode);
}
)
-;; Predicated LD[234].
-(define_insn "vec_mask_load_lanes<mode><vsingle>"
- [(set (match_operand:SVE_STRUCT 0 "register_operand" "=w")
- (unspec:SVE_STRUCT
- [(match_operand:<VPRED> 2 "register_operand" "Upl")
- (match_operand:SVE_STRUCT 1 "memory_operand" "m")]
- UNSPEC_LDN))]
+;; Predicated floating-point unary operations.
+(define_insn "*<optab><mode>2"
+ [(set (match_operand:SVE_F 0 "register_operand" "=w")
+ (unspec:SVE_F
+ [(match_operand:<VPRED> 1 "register_operand" "Upl")
+ (SVE_FP_UNARY:SVE_F (match_operand:SVE_F 2 "register_operand" "w"))]
+ UNSPEC_MERGE_PTRUE))]
"TARGET_SVE"
- "ld<vector_count><Vesize>\t%0, %2/z, %1"
+ "<sve_fp_op>\t%0.<Vetype>, %1/m, %2.<Vetype>"
)
-;; Unpredicated ST[234]. This is always a full update, so the dependence
-;; on the old value of the memory location (via (match_dup 0)) is redundant.
-;; There doesn't seem to be any obvious benefit to treating the all-true
-;; case differently though. In particular, it's very unlikely that we'll
-;; only find out during RTL that a store_lanes is dead.
-(define_expand "vec_store_lanes<mode><vsingle>"
- [(set (match_operand:SVE_STRUCT 0 "memory_operand")
- (unspec:SVE_STRUCT
+;; -------------------------------------------------------------------------
+;; ---- [FP] Rounding
+;; -------------------------------------------------------------------------
+;; Includes:
+;; - FRINTA
+;; - FRINTI
+;; - FRINTM
+;; - FRINTN
+;; - FRINTP
+;; - FRINTX
+;; - FRINTZ
+;; -------------------------------------------------------------------------
+
+;; Unpredicated FRINTy.
+(define_expand "<frint_pattern><mode>2"
+ [(set (match_operand:SVE_F 0 "register_operand")
+ (unspec:SVE_F
[(match_dup 2)
- (match_operand:SVE_STRUCT 1 "register_operand")
- (match_dup 0)]
- UNSPEC_STN))]
+ (unspec:SVE_F [(match_operand:SVE_F 1 "register_operand")]
+ FRINT)]
+ UNSPEC_MERGE_PTRUE))]
"TARGET_SVE"
{
operands[2] = aarch64_ptrue_reg (<VPRED>mode);
}
)
-;; Predicated ST[234].
-(define_insn "vec_mask_store_lanes<mode><vsingle>"
- [(set (match_operand:SVE_STRUCT 0 "memory_operand" "+m")
- (unspec:SVE_STRUCT
- [(match_operand:<VPRED> 2 "register_operand" "Upl")
- (match_operand:SVE_STRUCT 1 "register_operand" "w")
- (match_dup 0)]
- UNSPEC_STN))]
+;; FRINTy predicated with a PTRUE.
+(define_insn "*<frint_pattern><mode>2"
+ [(set (match_operand:SVE_F 0 "register_operand" "=w")
+ (unspec:SVE_F
+ [(match_operand:<VPRED> 1 "register_operand" "Upl")
+ (unspec:SVE_F [(match_operand:SVE_F 2 "register_operand" "w")]
+ FRINT)]
+ UNSPEC_MERGE_PTRUE))]
"TARGET_SVE"
- "st<vector_count><Vesize>\t%1, %2, %0"
+ "frint<frint_suffix>\t%0.<Vetype>, %1/m, %2.<Vetype>"
)
-(define_expand "vec_perm<mode>"
- [(match_operand:SVE_ALL 0 "register_operand")
- (match_operand:SVE_ALL 1 "register_operand")
- (match_operand:SVE_ALL 2 "register_operand")
- (match_operand:<V_INT_EQUIV> 3 "aarch64_sve_vec_perm_operand")]
- "TARGET_SVE && GET_MODE_NUNITS (<MODE>mode).is_constant ()"
- {
- aarch64_expand_sve_vec_perm (operands[0], operands[1],
- operands[2], operands[3]);
- DONE;
- }
-)
+;; -------------------------------------------------------------------------
+;; ---- [PRED] Inverse
+;; -------------------------------------------------------------------------
+;; Includes:
+;; - NOT
+;; -------------------------------------------------------------------------
-(define_insn "*aarch64_sve_tbl<mode>"
- [(set (match_operand:SVE_ALL 0 "register_operand" "=w")
- (unspec:SVE_ALL
- [(match_operand:SVE_ALL 1 "register_operand" "w")
- (match_operand:<V_INT_EQUIV> 2 "register_operand" "w")]
- UNSPEC_TBL))]
+;; Unpredicated predicate inverse.
+(define_expand "one_cmpl<mode>2"
+ [(set (match_operand:PRED_ALL 0 "register_operand")
+ (and:PRED_ALL
+ (not:PRED_ALL (match_operand:PRED_ALL 1 "register_operand"))
+ (match_dup 2)))]
"TARGET_SVE"
- "tbl\t%0.<Vetype>, %1.<Vetype>, %2.<Vetype>"
+ {
+ operands[2] = aarch64_ptrue_reg (<MODE>mode);
+ }
)
-(define_insn "*aarch64_sve_<perm_insn><perm_hilo><mode>"
+;; Predicated predicate inverse.
+(define_insn "*one_cmpl<mode>3"
[(set (match_operand:PRED_ALL 0 "register_operand" "=Upa")
- (unspec:PRED_ALL [(match_operand:PRED_ALL 1 "register_operand" "Upa")
- (match_operand:PRED_ALL 2 "register_operand" "Upa")]
- PERMUTE))]
- "TARGET_SVE"
- "<perm_insn><perm_hilo>\t%0.<Vetype>, %1.<Vetype>, %2.<Vetype>"
-)
-
-(define_insn "aarch64_sve_<perm_insn><perm_hilo><mode>"
- [(set (match_operand:SVE_ALL 0 "register_operand" "=w")
- (unspec:SVE_ALL [(match_operand:SVE_ALL 1 "register_operand" "w")
- (match_operand:SVE_ALL 2 "register_operand" "w")]
- PERMUTE))]
+ (and:PRED_ALL
+ (not:PRED_ALL (match_operand:PRED_ALL 2 "register_operand" "Upa"))
+ (match_operand:PRED_ALL 1 "register_operand" "Upa")))]
"TARGET_SVE"
- "<perm_insn><perm_hilo>\t%0.<Vetype>, %1.<Vetype>, %2.<Vetype>"
+ "not\t%0.b, %1/z, %2.b"
)
-(define_insn "*aarch64_sve_rev64<mode>"
- [(set (match_operand:SVE_BHS 0 "register_operand" "=w")
- (unspec:SVE_BHS
- [(match_operand:VNx2BI 1 "register_operand" "Upl")
- (unspec:SVE_BHS [(match_operand:SVE_BHS 2 "register_operand" "w")]
- UNSPEC_REV64)]
- UNSPEC_MERGE_PTRUE))]
- "TARGET_SVE"
- "rev<Vesize>\t%0.d, %1/m, %2.d"
-)
+;; =========================================================================
+;; == Binary arithmetic
+;; =========================================================================
+
+;; -------------------------------------------------------------------------
+;; ---- [INT] General binary arithmetic corresponding to rtx codes
+;; -------------------------------------------------------------------------
+;; Includes merging patterns for:
+;; - ADD
+;; - AND
+;; - EOR
+;; - MUL
+;; - ORR
+;; - SMAX
+;; - SMIN
+;; - SUB
+;; - UMAX
+;; - UMIN
+;; -------------------------------------------------------------------------
-(define_insn "*aarch64_sve_rev32<mode>"
- [(set (match_operand:SVE_BH 0 "register_operand" "=w")
- (unspec:SVE_BH
- [(match_operand:VNx4BI 1 "register_operand" "Upl")
- (unspec:SVE_BH [(match_operand:SVE_BH 2 "register_operand" "w")]
- UNSPEC_REV32)]
- UNSPEC_MERGE_PTRUE))]
+;; Predicated integer operations with merging.
+(define_expand "cond_<optab><mode>"
+ [(set (match_operand:SVE_I 0 "register_operand")
+ (unspec:SVE_I
+ [(match_operand:<VPRED> 1 "register_operand")
+ (SVE_INT_BINARY:SVE_I
+ (match_operand:SVE_I 2 "register_operand")
+ (match_operand:SVE_I 3 "register_operand"))
+ (match_operand:SVE_I 4 "aarch64_simd_reg_or_zero")]
+ UNSPEC_SEL))]
"TARGET_SVE"
- "rev<Vesize>\t%0.s, %1/m, %2.s"
)
-(define_insn "*aarch64_sve_rev16vnx16qi"
- [(set (match_operand:VNx16QI 0 "register_operand" "=w")
- (unspec:VNx16QI
- [(match_operand:VNx8BI 1 "register_operand" "Upl")
- (unspec:VNx16QI [(match_operand:VNx16QI 2 "register_operand" "w")]
- UNSPEC_REV16)]
- UNSPEC_MERGE_PTRUE))]
+;; Predicated integer operations, merging with the first input.
+(define_insn "*cond_<optab><mode>_2"
+ [(set (match_operand:SVE_I 0 "register_operand" "=w, ?&w")
+ (unspec:SVE_I
+ [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
+ (SVE_INT_BINARY:SVE_I
+ (match_operand:SVE_I 2 "register_operand" "0, w")
+ (match_operand:SVE_I 3 "register_operand" "w, w"))
+ (match_dup 2)]
+ UNSPEC_SEL))]
"TARGET_SVE"
- "revb\t%0.h, %1/m, %2.h"
+ "@
+ <sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
+ movprfx\t%0, %2\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>"
+ [(set_attr "movprfx" "*,yes")]
)
-(define_insn "@aarch64_sve_rev<mode>"
- [(set (match_operand:SVE_ALL 0 "register_operand" "=w")
- (unspec:SVE_ALL [(match_operand:SVE_ALL 1 "register_operand" "w")]
- UNSPEC_REV))]
+;; Predicated integer operations, merging with the second input.
+(define_insn "*cond_<optab><mode>_3"
+ [(set (match_operand:SVE_I 0 "register_operand" "=w, ?&w")
+ (unspec:SVE_I
+ [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
+ (SVE_INT_BINARY:SVE_I
+ (match_operand:SVE_I 2 "register_operand" "w, w")
+ (match_operand:SVE_I 3 "register_operand" "0, w"))
+ (match_dup 3)]
+ UNSPEC_SEL))]
"TARGET_SVE"
- "rev\t%0.<Vetype>, %1.<Vetype>")
-
-(define_insn "*aarch64_sve_dup_lane<mode>"
- [(set (match_operand:SVE_ALL 0 "register_operand" "=w")
- (vec_duplicate:SVE_ALL
- (vec_select:<VEL>
- (match_operand:SVE_ALL 1 "register_operand" "w")
- (parallel [(match_operand:SI 2 "const_int_operand")]))))]
- "TARGET_SVE
- && IN_RANGE (INTVAL (operands[2]) * GET_MODE_SIZE (<VEL>mode), 0, 63)"
- "dup\t%0.<Vetype>, %1.<Vetype>[%2]"
+ "@
+ <sve_int_op_rev>\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype>
+ movprfx\t%0, %3\;<sve_int_op_rev>\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype>"
+ [(set_attr "movprfx" "*,yes")]
)
-;; Note that the immediate (third) operand is the lane index not
-;; the byte index.
-(define_insn "*aarch64_sve_ext<mode>"
- [(set (match_operand:SVE_ALL 0 "register_operand" "=w")
- (unspec:SVE_ALL [(match_operand:SVE_ALL 1 "register_operand" "0")
- (match_operand:SVE_ALL 2 "register_operand" "w")
- (match_operand:SI 3 "const_int_operand")]
- UNSPEC_EXT))]
+;; Predicated integer operations, merging with an independent value.
+(define_insn_and_rewrite "*cond_<optab><mode>_any"
+ [(set (match_operand:SVE_I 0 "register_operand" "=&w, &w, &w, &w, ?&w")
+ (unspec:SVE_I
+ [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl, Upl, Upl")
+ (SVE_INT_BINARY:SVE_I
+ (match_operand:SVE_I 2 "register_operand" "0, w, w, w, w")
+ (match_operand:SVE_I 3 "register_operand" "w, 0, w, w, w"))
+ (match_operand:SVE_I 4 "aarch64_simd_reg_or_zero" "Dz, Dz, Dz, 0, w")]
+ UNSPEC_SEL))]
"TARGET_SVE
- && IN_RANGE (INTVAL (operands[3]) * GET_MODE_SIZE (<VEL>mode), 0, 255)"
+ && !rtx_equal_p (operands[2], operands[4])
+ && !rtx_equal_p (operands[3], operands[4])"
+ "@
+ movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
+ movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;<sve_int_op_rev>\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype>
+ movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
+ movprfx\t%0.<Vetype>, %1/m, %2.<Vetype>\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
+ #"
+ "&& reload_completed
+ && register_operand (operands[4], <MODE>mode)
+ && !rtx_equal_p (operands[0], operands[4])"
{
- operands[3] = GEN_INT (INTVAL (operands[3]) * GET_MODE_SIZE (<VEL>mode));
- return "ext\\t%0.b, %0.b, %2.b, #%3";
+ emit_insn (gen_vcond_mask_<mode><vpred> (operands[0], operands[2],
+ operands[4], operands[1]));
+ operands[4] = operands[2] = operands[0];
}
+ [(set_attr "movprfx" "yes")]
)
+;; -------------------------------------------------------------------------
+;; ---- [INT] Addition
+;; -------------------------------------------------------------------------
+;; Includes:
+;; - ADD
+;; - DECB
+;; - DECD
+;; - DECH
+;; - DECW
+;; - INCB
+;; - INCD
+;; - INCH
+;; - INCW
+;; - SUB
+;; -------------------------------------------------------------------------
+
(define_insn "add<mode>3"
[(set (match_operand:SVE_I 0 "register_operand" "=w, w, w, w")
(plus:SVE_I
@@ -911,6 +1364,16 @@ (define_insn "add<mode>3"
add\t%0.<Vetype>, %1.<Vetype>, %2.<Vetype>"
)
+;; Merging forms are handled through SVE_INT_BINARY.
+
+;; -------------------------------------------------------------------------
+;; ---- [INT] Subtraction
+;; -------------------------------------------------------------------------
+;; Includes:
+;; - SUB
+;; - SUBR
+;; -------------------------------------------------------------------------
+
(define_insn "sub<mode>3"
[(set (match_operand:SVE_I 0 "register_operand" "=w, w")
(minus:SVE_I
@@ -922,6 +1385,57 @@ (define_insn "sub<mode>3"
subr\t%0.<Vetype>, %0.<Vetype>, #%D1"
)
+;; Merging forms are handled through SVE_INT_BINARY.
+
+;; -------------------------------------------------------------------------
+;; ---- [INT] Absolute difference
+;; -------------------------------------------------------------------------
+;; Includes:
+;; - SABD
+;; - UABD
+;; -------------------------------------------------------------------------
+
+;; Unpredicated integer absolute difference.
+(define_expand "<su>abd<mode>_3"
+ [(use (match_operand:SVE_I 0 "register_operand"))
+ (USMAX:SVE_I (match_operand:SVE_I 1 "register_operand")
+ (match_operand:SVE_I 2 "register_operand"))]
+ "TARGET_SVE"
+ {
+ rtx pred = aarch64_ptrue_reg (<VPRED>mode);
+ emit_insn (gen_aarch64_<su>abd<mode>_3 (operands[0], pred, operands[1],
+ operands[2]));
+ DONE;
+ }
+)
+
+;; Predicated integer absolute difference.
+(define_insn "aarch64_<su>abd<mode>_3"
+ [(set (match_operand:SVE_I 0 "register_operand" "=w, ?&w")
+ (unspec:SVE_I
+ [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
+ (minus:SVE_I
+ (USMAX:SVE_I
+ (match_operand:SVE_I 2 "register_operand" "0, w")
+ (match_operand:SVE_I 3 "register_operand" "w, w"))
+ (<max_opp>:SVE_I
+ (match_dup 2)
+ (match_dup 3)))]
+ UNSPEC_MERGE_PTRUE))]
+ "TARGET_SVE"
+ "@
+ <su>abd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
+ movprfx\t%0, %2\;<su>abd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>"
+ [(set_attr "movprfx" "*,yes")]
+)
+
+;; -------------------------------------------------------------------------
+;; ---- [INT] Multiplication
+;; -------------------------------------------------------------------------
+;; Includes:
+;; - MUL
+;; -------------------------------------------------------------------------
+
;; Unpredicated multiplication.
(define_expand "mul<mode>3"
[(set (match_operand:SVE_I 0 "register_operand")
@@ -975,39 +1489,15 @@ (define_insn "*post_ra_mul<mode>3"
"mul\t%0.<Vetype>, %0.<Vetype>, #%2"
)
-(define_insn "*madd<mode>"
- [(set (match_operand:SVE_I 0 "register_operand" "=w, w, ?&w")
- (plus:SVE_I
- (unspec:SVE_I
- [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl")
- (mult:SVE_I (match_operand:SVE_I 2 "register_operand" "%0, w, w")
- (match_operand:SVE_I 3 "register_operand" "w, w, w"))]
- UNSPEC_MERGE_PTRUE)
- (match_operand:SVE_I 4 "register_operand" "w, 0, w")))]
- "TARGET_SVE"
- "@
- mad\t%0.<Vetype>, %1/m, %3.<Vetype>, %4.<Vetype>
- mla\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>
- movprfx\t%0, %4\;mla\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>"
- [(set_attr "movprfx" "*,*,yes")]
-)
+;; Merging forms are handled through SVE_INT_BINARY.
-(define_insn "*msub<mode>3"
- [(set (match_operand:SVE_I 0 "register_operand" "=w, w, ?&w")
- (minus:SVE_I
- (match_operand:SVE_I 4 "register_operand" "w, 0, w")
- (unspec:SVE_I
- [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl")
- (mult:SVE_I (match_operand:SVE_I 2 "register_operand" "%0, w, w")
- (match_operand:SVE_I 3 "register_operand" "w, w, w"))]
- UNSPEC_MERGE_PTRUE)))]
- "TARGET_SVE"
- "@
- msb\t%0.<Vetype>, %1/m, %3.<Vetype>, %4.<Vetype>
- mls\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>
- movprfx\t%0, %4\;mls\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>"
- [(set_attr "movprfx" "*,*,yes")]
-)
+;; -------------------------------------------------------------------------
+;; ---- [INT] Highpart multiplication
+;; -------------------------------------------------------------------------
+;; Includes:
+;; - SMULH
+;; - UMULH
+;; -------------------------------------------------------------------------
;; Unpredicated highpart multiplication.
(define_expand "<su>mul<mode>3_highpart"
@@ -1040,7 +1530,17 @@ (define_insn "*<su>mul<mode>3_highpart"
[(set_attr "movprfx" "*,yes")]
)
-;; Unpredicated division.
+;; -------------------------------------------------------------------------
+;; ---- [INT] Division
+;; -------------------------------------------------------------------------
+;; Includes:
+;; - SDIV
+;; - SDIVR
+;; - UDIV
+;; - UDIVR
+;; -------------------------------------------------------------------------
+
+;; Unpredicated integer division.
(define_expand "<optab><mode>3"
[(set (match_operand:SVE_SDI 0 "register_operand")
(unspec:SVE_SDI
@@ -1055,7 +1555,7 @@ (define_expand "<optab><mode>3"
}
)
-;; Division predicated with a PTRUE.
+;; Integer division predicated with a PTRUE.
(define_insn "*<optab><mode>3"
[(set (match_operand:SVE_SDI 0 "register_operand" "=w, w, ?&w")
(unspec:SVE_SDI
@@ -1072,44 +1572,579 @@ (define_insn "*<optab><mode>3"
[(set_attr "movprfx" "*,*,yes")]
)
-;; Unpredicated NEG, NOT and POPCOUNT.
-(define_expand "<optab><mode>2"
+;; Predicated integer division with merging.
+(define_expand "cond_<optab><mode>"
+ [(set (match_operand:SVE_SDI 0 "register_operand")
+ (unspec:SVE_SDI
+ [(match_operand:<VPRED> 1 "register_operand")
+ (SVE_INT_BINARY_SD:SVE_SDI
+ (match_operand:SVE_SDI 2 "register_operand")
+ (match_operand:SVE_SDI 3 "register_operand"))
+ (match_operand:SVE_SDI 4 "aarch64_simd_reg_or_zero")]
+ UNSPEC_SEL))]
+ "TARGET_SVE"
+)
+
+;; Predicated integer division, merging with the first input.
+(define_insn "*cond_<optab><mode>_2"
+ [(set (match_operand:SVE_SDI 0 "register_operand" "=w, ?&w")
+ (unspec:SVE_SDI
+ [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
+ (SVE_INT_BINARY_SD:SVE_SDI
+ (match_operand:SVE_SDI 2 "register_operand" "0, w")
+ (match_operand:SVE_SDI 3 "register_operand" "w, w"))
+ (match_dup 2)]
+ UNSPEC_SEL))]
+ "TARGET_SVE"
+ "@
+ <sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
+ movprfx\t%0, %2\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>"
+ [(set_attr "movprfx" "*,yes")]
+)
+
+;; Predicated integer division, merging with the second input.
+(define_insn "*cond_<optab><mode>_3"
+ [(set (match_operand:SVE_SDI 0 "register_operand" "=w, ?&w")
+ (unspec:SVE_SDI
+ [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
+ (SVE_INT_BINARY_SD:SVE_SDI
+ (match_operand:SVE_SDI 2 "register_operand" "w, w")
+ (match_operand:SVE_SDI 3 "register_operand" "0, w"))
+ (match_dup 3)]
+ UNSPEC_SEL))]
+ "TARGET_SVE"
+ "@
+ <sve_int_op_rev>\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype>
+ movprfx\t%0, %3\;<sve_int_op_rev>\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype>"
+ [(set_attr "movprfx" "*,yes")]
+)
+
+;; Predicated integer division, merging with an independent value.
+(define_insn_and_rewrite "*cond_<optab><mode>_any"
+ [(set (match_operand:SVE_SDI 0 "register_operand" "=&w, &w, &w, &w, ?&w")
+ (unspec:SVE_SDI
+ [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl, Upl, Upl")
+ (SVE_INT_BINARY_SD:SVE_SDI
+ (match_operand:SVE_SDI 2 "register_operand" "0, w, w, w, w")
+ (match_operand:SVE_SDI 3 "register_operand" "w, 0, w, w, w"))
+ (match_operand:SVE_SDI 4 "aarch64_simd_reg_or_zero" "Dz, Dz, Dz, 0, w")]
+ UNSPEC_SEL))]
+ "TARGET_SVE
+ && !rtx_equal_p (operands[2], operands[4])
+ && !rtx_equal_p (operands[3], operands[4])"
+ "@
+ movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
+ movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;<sve_int_op_rev>\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype>
+ movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
+ movprfx\t%0.<Vetype>, %1/m, %2.<Vetype>\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
+ #"
+ "&& reload_completed
+ && register_operand (operands[4], <MODE>mode)
+ && !rtx_equal_p (operands[0], operands[4])"
+ {
+ emit_insn (gen_vcond_mask_<mode><vpred> (operands[0], operands[2],
+ operands[4], operands[1]));
+ operands[4] = operands[2] = operands[0];
+ }
+ [(set_attr "movprfx" "yes")]
+)
+
+;; -------------------------------------------------------------------------
+;; ---- [INT] Binary logical operations
+;; -------------------------------------------------------------------------
+;; Includes:
+;; - AND
+;; - EOR
+;; - ORR
+;; -------------------------------------------------------------------------
+
+;; Unpredicated integer binary logical operations.
+(define_insn "<optab><mode>3"
+ [(set (match_operand:SVE_I 0 "register_operand" "=w, w")
+ (LOGICAL:SVE_I
+ (match_operand:SVE_I 1 "register_operand" "%0, w")
+ (match_operand:SVE_I 2 "aarch64_sve_logical_operand" "vsl, w")))]
+ "TARGET_SVE"
+ "@
+ <logical>\t%0.<Vetype>, %0.<Vetype>, #%C2
+ <logical>\t%0.d, %1.d, %2.d"
+)
+
+;; Merging forms are handled through SVE_INT_BINARY.
+
+;; -------------------------------------------------------------------------
+;; ---- [INT] Binary logical operations (inverted second input)
+;; -------------------------------------------------------------------------
+;; Includes:
+;; - BIC
+;; -------------------------------------------------------------------------
+
+;; REG_EQUAL notes on "not<mode>3" should ensure that we can generate
+;; this pattern even though the NOT instruction itself is predicated.
+(define_insn "bic<mode>3"
+ [(set (match_operand:SVE_I 0 "register_operand" "=w")
+ (and:SVE_I
+ (not:SVE_I (match_operand:SVE_I 1 "register_operand" "w"))
+ (match_operand:SVE_I 2 "register_operand" "w")))]
+ "TARGET_SVE"
+ "bic\t%0.d, %2.d, %1.d"
+)
+
+;; -------------------------------------------------------------------------
+;; ---- [INT] Shifts
+;; -------------------------------------------------------------------------
+;; Includes:
+;; - ASR
+;; - LSL
+;; - LSR
+;; -------------------------------------------------------------------------
+
+;; Unpredicated shift by a scalar, which expands into one of the vector
+;; shifts below.
+(define_expand "<ASHIFT:optab><mode>3"
+ [(set (match_operand:SVE_I 0 "register_operand")
+ (ASHIFT:SVE_I (match_operand:SVE_I 1 "register_operand")
+ (match_operand:<VEL> 2 "general_operand")))]
+ "TARGET_SVE"
+ {
+ rtx amount;
+ if (CONST_INT_P (operands[2]))
+ {
+ amount = gen_const_vec_duplicate (<MODE>mode, operands[2]);
+ if (!aarch64_sve_<lr>shift_operand (operands[2], <MODE>mode))
+ amount = force_reg (<MODE>mode, amount);
+ }
+ else
+ {
+ amount = gen_reg_rtx (<MODE>mode);
+ emit_insn (gen_vec_duplicate<mode> (amount,
+ convert_to_mode (<VEL>mode,
+ operands[2], 0)));
+ }
+ emit_insn (gen_v<optab><mode>3 (operands[0], operands[1], amount));
+ DONE;
+ }
+)
+
+;; Unpredicated shift by a vector.
+(define_expand "v<optab><mode>3"
[(set (match_operand:SVE_I 0 "register_operand")
(unspec:SVE_I
- [(match_dup 2)
- (SVE_INT_UNARY:SVE_I (match_operand:SVE_I 1 "register_operand"))]
+ [(match_dup 3)
+ (ASHIFT:SVE_I
+ (match_operand:SVE_I 1 "register_operand")
+ (match_operand:SVE_I 2 "aarch64_sve_<lr>shift_operand"))]
UNSPEC_MERGE_PTRUE))]
"TARGET_SVE"
{
- operands[2] = aarch64_ptrue_reg (<VPRED>mode);
+ operands[3] = aarch64_ptrue_reg (<VPRED>mode);
}
)
-;; NEG, NOT and POPCOUNT predicated with a PTRUE.
-(define_insn "*<optab><mode>2"
+;; Shift by a vector, predicated with a PTRUE. We don't actually need
+;; the predicate for the first alternative, but using Upa or X isn't
+;; likely to gain much and would make the instruction seem less uniform
+;; to the register allocator.
+(define_insn_and_split "*v<optab><mode>3"
+ [(set (match_operand:SVE_I 0 "register_operand" "=w, w, ?&w")
+ (unspec:SVE_I
+ [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl")
+ (ASHIFT:SVE_I
+ (match_operand:SVE_I 2 "register_operand" "w, 0, w")
+ (match_operand:SVE_I 3 "aarch64_sve_<lr>shift_operand" "D<lr>, w, w"))]
+ UNSPEC_MERGE_PTRUE))]
+ "TARGET_SVE"
+ "@
+ #
+ <shift>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
+ movprfx\t%0, %2\;<shift>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>"
+ "&& reload_completed
+ && !register_operand (operands[3], <MODE>mode)"
+ [(set (match_dup 0) (ASHIFT:SVE_I (match_dup 2) (match_dup 3)))]
+ ""
+ [(set_attr "movprfx" "*,*,yes")]
+)
+
+;; Unpredicated shift operations by a constant (post-RA only).
+;; These are generated by splitting a predicated instruction whose
+;; predicate is unused.
+(define_insn "*post_ra_v<optab><mode>3"
[(set (match_operand:SVE_I 0 "register_operand" "=w")
+ (ASHIFT:SVE_I
+ (match_operand:SVE_I 1 "register_operand" "w")
+ (match_operand:SVE_I 2 "aarch64_simd_<lr>shift_imm")))]
+ "TARGET_SVE && reload_completed"
+ "<shift>\t%0.<Vetype>, %1.<Vetype>, #%2"
+)
+
+;; -------------------------------------------------------------------------
+;; ---- [INT] Maximum and minimum
+;; -------------------------------------------------------------------------
+;; Includes:
+;; - SMAX
+;; - SMIN
+;; - UMAX
+;; - UMIN
+;; -------------------------------------------------------------------------
+
+;; Unpredicated integer MAX/MIN.
+(define_expand "<su><maxmin><mode>3"
+ [(set (match_operand:SVE_I 0 "register_operand")
(unspec:SVE_I
+ [(match_dup 3)
+ (MAXMIN:SVE_I (match_operand:SVE_I 1 "register_operand")
+ (match_operand:SVE_I 2 "register_operand"))]
+ UNSPEC_MERGE_PTRUE))]
+ "TARGET_SVE"
+ {
+ operands[3] = aarch64_ptrue_reg (<VPRED>mode);
+ }
+)
+
+;; Integer MAX/MIN predicated with a PTRUE.
+(define_insn "*<su><maxmin><mode>3"
+ [(set (match_operand:SVE_I 0 "register_operand" "=w, ?&w")
+ (unspec:SVE_I
+ [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
+ (MAXMIN:SVE_I (match_operand:SVE_I 2 "register_operand" "%0, w")
+ (match_operand:SVE_I 3 "register_operand" "w, w"))]
+ UNSPEC_MERGE_PTRUE))]
+ "TARGET_SVE"
+ "@
+ <su><maxmin>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
+ movprfx\t%0, %2\;<su><maxmin>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>"
+ [(set_attr "movprfx" "*,yes")]
+)
+
+;; Merging forms are handled through SVE_INT_BINARY.
+
+;; -------------------------------------------------------------------------
+;; ---- [FP] General binary arithmetic corresponding to rtx codes
+;; -------------------------------------------------------------------------
+;; Includes post-RA forms of:
+;; - FADD
+;; - FMUL
+;; - FSUB
+;; -------------------------------------------------------------------------
+
+;; Unpredicated floating-point binary operations (post-RA only).
+;; These are generated by splitting a predicated instruction whose
+;; predicate is unused.
+(define_insn "*post_ra_<sve_fp_op><mode>3"
+ [(set (match_operand:SVE_F 0 "register_operand" "=w")
+ (SVE_UNPRED_FP_BINARY:SVE_F
+ (match_operand:SVE_F 1 "register_operand" "w")
+ (match_operand:SVE_F 2 "register_operand" "w")))]
+ "TARGET_SVE && reload_completed"
+ "<sve_fp_op>\t%0.<Vetype>, %1.<Vetype>, %2.<Vetype>")
+
+;; -------------------------------------------------------------------------
+;; ---- [FP] General binary arithmetic corresponding to unspecs
+;; -------------------------------------------------------------------------
+;; Includes merging forms of:
+;; - FADD
+;; - FDIV
+;; - FDIVR
+;; - FMAXNM
+;; - FMINNM
+;; - FMUL
+;; - FSUB
+;; - FSUBR
+;; -------------------------------------------------------------------------
+
+;; Predicated floating-point operations with merging.
+(define_expand "cond_<optab><mode>"
+ [(set (match_operand:SVE_F 0 "register_operand")
+ (unspec:SVE_F
+ [(match_operand:<VPRED> 1 "register_operand")
+ (unspec:SVE_F
+ [(match_operand:SVE_F 2 "register_operand")
+ (match_operand:SVE_F 3 "register_operand")]
+ SVE_COND_FP_BINARY)
+ (match_operand:SVE_F 4 "aarch64_simd_reg_or_zero")]
+ UNSPEC_SEL))]
+ "TARGET_SVE"
+)
+
+;; Predicated floating-point operations, merging with the first input.
+(define_insn "*cond_<optab><mode>_2"
+ [(set (match_operand:SVE_F 0 "register_operand" "=w, ?&w")
+ (unspec:SVE_F
+ [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
+ (unspec:SVE_F
+ [(match_operand:SVE_F 2 "register_operand" "0, w")
+ (match_operand:SVE_F 3 "register_operand" "w, w")]
+ SVE_COND_FP_BINARY)
+ (match_dup 2)]
+ UNSPEC_SEL))]
+ "TARGET_SVE"
+ "@
+ <sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
+ movprfx\t%0, %2\;<sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>"
+ [(set_attr "movprfx" "*,yes")]
+)
+
+;; Predicated floating-point operations, merging with the second input.
+(define_insn "*cond_<optab><mode>_3"
+ [(set (match_operand:SVE_F 0 "register_operand" "=w, ?&w")
+ (unspec:SVE_F
+ [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
+ (unspec:SVE_F
+ [(match_operand:SVE_F 2 "register_operand" "w, w")
+ (match_operand:SVE_F 3 "register_operand" "0, w")]
+ SVE_COND_FP_BINARY)
+ (match_dup 3)]
+ UNSPEC_SEL))]
+ "TARGET_SVE"
+ "@
+ <sve_fp_op_rev>\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype>
+ movprfx\t%0, %3\;<sve_fp_op_rev>\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype>"
+ [(set_attr "movprfx" "*,yes")]
+)
+
+;; Predicated floating-point operations, merging with an independent value.
+(define_insn_and_rewrite "*cond_<optab><mode>_any"
+ [(set (match_operand:SVE_F 0 "register_operand" "=&w, &w, &w, &w, ?&w")
+ (unspec:SVE_F
+ [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl, Upl, Upl")
+ (unspec:SVE_F
+ [(match_operand:SVE_F 2 "register_operand" "0, w, w, w, w")
+ (match_operand:SVE_F 3 "register_operand" "w, 0, w, w, w")]
+ SVE_COND_FP_BINARY)
+ (match_operand:SVE_F 4 "aarch64_simd_reg_or_zero" "Dz, Dz, Dz, 0, w")]
+ UNSPEC_SEL))]
+ "TARGET_SVE
+ && !rtx_equal_p (operands[2], operands[4])
+ && !rtx_equal_p (operands[3], operands[4])"
+ "@
+ movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;<sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
+ movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;<sve_fp_op_rev>\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype>
+ movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;<sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
+ movprfx\t%0.<Vetype>, %1/m, %2.<Vetype>\;<sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
+ #"
+ "&& reload_completed
+ && register_operand (operands[4], <MODE>mode)
+ && !rtx_equal_p (operands[0], operands[4])"
+ {
+ emit_insn (gen_vcond_mask_<mode><vpred> (operands[0], operands[2],
+ operands[4], operands[1]));
+ operands[4] = operands[2] = operands[0];
+ }
+ [(set_attr "movprfx" "yes")]
+)
+
+;; -------------------------------------------------------------------------
+;; ---- [FP] Addition
+;; -------------------------------------------------------------------------
+;; Includes:
+;; - FADD
+;; - FSUB
+;; -------------------------------------------------------------------------
+
+;; Unpredicated floating-point addition.
+(define_expand "add<mode>3"
+ [(set (match_operand:SVE_F 0 "register_operand")
+ (unspec:SVE_F
+ [(match_dup 3)
+ (plus:SVE_F
+ (match_operand:SVE_F 1 "register_operand")
+ (match_operand:SVE_F 2 "aarch64_sve_float_arith_with_sub_operand"))]
+ UNSPEC_MERGE_PTRUE))]
+ "TARGET_SVE"
+ {
+ operands[3] = aarch64_ptrue_reg (<VPRED>mode);
+ }
+)
+
+;; Floating-point addition predicated with a PTRUE.
+(define_insn_and_split "*add<mode>3"
+ [(set (match_operand:SVE_F 0 "register_operand" "=w, w, w")
+ (unspec:SVE_F
+ [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl")
+ (plus:SVE_F
+ (match_operand:SVE_F 2 "register_operand" "%0, 0, w")
+ (match_operand:SVE_F 3 "aarch64_sve_float_arith_with_sub_operand" "vsA, vsN, w"))]
+ UNSPEC_MERGE_PTRUE))]
+ "TARGET_SVE"
+ "@
+ fadd\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3
+ fsub\t%0.<Vetype>, %1/m, %0.<Vetype>, #%N3
+ #"
+ ; Split the unpredicated form after reload, so that we don't have
+ ; the unnecessary PTRUE.
+ "&& reload_completed
+ && register_operand (operands[3], <MODE>mode)"
+ [(set (match_dup 0) (plus:SVE_F (match_dup 2) (match_dup 3)))]
+)
+
+;; Merging forms are handled through SVE_COND_FP_BINARY.
+
+;; -------------------------------------------------------------------------
+;; ---- [FP] Subtraction
+;; -------------------------------------------------------------------------
+;; Includes:
+;; - FADD
+;; - FSUB
+;; - FSUBR
+;; -------------------------------------------------------------------------
+
+;; Unpredicated floating-point subtraction.
+(define_expand "sub<mode>3"
+ [(set (match_operand:SVE_F 0 "register_operand")
+ (unspec:SVE_F
+ [(match_dup 3)
+ (minus:SVE_F
+ (match_operand:SVE_F 1 "aarch64_sve_float_arith_operand")
+ (match_operand:SVE_F 2 "register_operand"))]
+ UNSPEC_MERGE_PTRUE))]
+ "TARGET_SVE"
+ {
+ operands[3] = aarch64_ptrue_reg (<VPRED>mode);
+ }
+)
+
+;; Floating-point subtraction predicated with a PTRUE.
+(define_insn_and_split "*sub<mode>3"
+ [(set (match_operand:SVE_F 0 "register_operand" "=w, w, w, w")
+ (unspec:SVE_F
+ [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl, Upl")
+ (minus:SVE_F
+ (match_operand:SVE_F 2 "aarch64_sve_float_arith_operand" "0, 0, vsA, w")
+ (match_operand:SVE_F 3 "aarch64_sve_float_arith_with_sub_operand" "vsA, vsN, 0, w"))]
+ UNSPEC_MERGE_PTRUE))]
+ "TARGET_SVE
+ && (register_operand (operands[2], <MODE>mode)
+ || register_operand (operands[3], <MODE>mode))"
+ "@
+ fsub\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3
+ fadd\t%0.<Vetype>, %1/m, %0.<Vetype>, #%N3
+ fsubr\t%0.<Vetype>, %1/m, %0.<Vetype>, #%2
+ #"
+ ; Split the unpredicated form after reload, so that we don't have
+ ; the unnecessary PTRUE.
+ "&& reload_completed
+ && register_operand (operands[2], <MODE>mode)
+ && register_operand (operands[3], <MODE>mode)"
+ [(set (match_dup 0) (minus:SVE_F (match_dup 2) (match_dup 3)))]
+)
+
+;; Merging forms are handled through SVE_COND_FP_BINARY.
+
+;; -------------------------------------------------------------------------
+;; ---- [FP] Absolute difference
+;; -------------------------------------------------------------------------
+;; Includes:
+;; - FABD
+;; -------------------------------------------------------------------------
+
+;; Predicated floating-point absolute difference.
+(define_insn "*fabd<mode>3"
+ [(set (match_operand:SVE_F 0 "register_operand" "=w")
+ (unspec:SVE_F
[(match_operand:<VPRED> 1 "register_operand" "Upl")
- (SVE_INT_UNARY:SVE_I
- (match_operand:SVE_I 2 "register_operand" "w"))]
+ (abs:SVE_F
+ (minus:SVE_F
+ (match_operand:SVE_F 2 "register_operand" "0")
+ (match_operand:SVE_F 3 "register_operand" "w")))]
+ UNSPEC_MERGE_PTRUE))]
+ "TARGET_SVE"
+ "fabd\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>"
+)
+
+;; -------------------------------------------------------------------------
+;; ---- [FP] Multiplication
+;; -------------------------------------------------------------------------
+;; Includes:
+;; - FMUL
+;; -------------------------------------------------------------------------
+
+;; Unpredicated floating-point multiplication.
+(define_expand "mul<mode>3"
+ [(set (match_operand:SVE_F 0 "register_operand")
+ (unspec:SVE_F
+ [(match_dup 3)
+ (mult:SVE_F
+ (match_operand:SVE_F 1 "register_operand")
+ (match_operand:SVE_F 2 "aarch64_sve_float_mul_operand"))]
UNSPEC_MERGE_PTRUE))]
"TARGET_SVE"
- "<sve_int_op>\t%0.<Vetype>, %1/m, %2.<Vetype>"
+ {
+ operands[3] = aarch64_ptrue_reg (<VPRED>mode);
+ }
)
-;; Vector AND, ORR and XOR.
-(define_insn "<optab><mode>3"
- [(set (match_operand:SVE_I 0 "register_operand" "=w, w")
- (LOGICAL:SVE_I
- (match_operand:SVE_I 1 "register_operand" "%0, w")
- (match_operand:SVE_I 2 "aarch64_sve_logical_operand" "vsl, w")))]
+;; Floating-point multiplication predicated with a PTRUE.
+(define_insn_and_split "*mul<mode>3"
+ [(set (match_operand:SVE_F 0 "register_operand" "=w, w")
+ (unspec:SVE_F
+ [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
+ (mult:SVE_F
+ (match_operand:SVE_F 2 "register_operand" "%0, w")
+ (match_operand:SVE_F 3 "aarch64_sve_float_mul_operand" "vsM, w"))]
+ UNSPEC_MERGE_PTRUE))]
"TARGET_SVE"
"@
- <logical>\t%0.<Vetype>, %0.<Vetype>, #%C2
- <logical>\t%0.d, %1.d, %2.d"
+ fmul\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3
+ #"
+ ; Split the unpredicated form after reload, so that we don't have
+ ; the unnecessary PTRUE.
+ "&& reload_completed
+ && register_operand (operands[3], <MODE>mode)"
+ [(set (match_dup 0) (mult:SVE_F (match_dup 2) (match_dup 3)))]
+)
+
+;; Merging forms are handled through SVE_COND_FP_BINARY.
+
+;; -------------------------------------------------------------------------
+;; ---- [FP] Division
+;; -------------------------------------------------------------------------
+;; Includes:
+;; - FDIV
+;; - FDIVR
+;; -------------------------------------------------------------------------
+
+;; Unpredicated floating-point division.
+(define_expand "div<mode>3"
+ [(set (match_operand:SVE_F 0 "register_operand")
+ (unspec:SVE_F
+ [(match_dup 3)
+ (div:SVE_F (match_operand:SVE_F 1 "register_operand")
+ (match_operand:SVE_F 2 "register_operand"))]
+ UNSPEC_MERGE_PTRUE))]
+ "TARGET_SVE"
+ {
+ operands[3] = aarch64_ptrue_reg (<VPRED>mode);
+ }
)
-;; Vector AND, ORR and XOR on floating-point modes. We avoid subregs
+;; Floating-point division predicated with a PTRUE.
+(define_insn "*div<mode>3"
+ [(set (match_operand:SVE_F 0 "register_operand" "=w, w, ?&w")
+ (unspec:SVE_F
+ [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl")
+ (div:SVE_F (match_operand:SVE_F 2 "register_operand" "0, w, w")
+ (match_operand:SVE_F 3 "register_operand" "w, 0, w"))]
+ UNSPEC_MERGE_PTRUE))]
+ "TARGET_SVE"
+ "@
+ fdiv\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
+ fdivr\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype>
+ movprfx\t%0, %2\;fdiv\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>"
+ [(set_attr "movprfx" "*,*,yes")]
+)
+
+;; Merging forms are handled through SVE_COND_FP_BINARY.
+
+;; -------------------------------------------------------------------------
+;; ---- [FP] Binary logical operations
+;; -------------------------------------------------------------------------
+;; Includes
+;; - AND
+;; - EOR
+;; - ORR
+;; -------------------------------------------------------------------------
+
+;; Binary logical operations on floating-point modes. We avoid subregs
;; by providing this, but we need to use UNSPECs since rtx logical ops
;; aren't defined for floating-point modes.
(define_insn "*<optab><mode>3"
@@ -1121,17 +2156,150 @@ (define_insn "*<optab><mode>3"
"<logicalf_op>\t%0.d, %1.d, %2.d"
)
-;; REG_EQUAL notes on "not<mode>3" should ensure that we can generate
-;; this pattern even though the NOT instruction itself is predicated.
-(define_insn "bic<mode>3"
- [(set (match_operand:SVE_I 0 "register_operand" "=w")
- (and:SVE_I
- (not:SVE_I (match_operand:SVE_I 1 "register_operand" "w"))
- (match_operand:SVE_I 2 "register_operand" "w")))]
+;; -------------------------------------------------------------------------
+;; ---- [FP] Sign copying
+;; -------------------------------------------------------------------------
+;; The patterns in this section are synthetic.
+;; -------------------------------------------------------------------------
+
+(define_expand "copysign<mode>3"
+ [(match_operand:SVE_F 0 "register_operand")
+ (match_operand:SVE_F 1 "register_operand")
+ (match_operand:SVE_F 2 "register_operand")]
"TARGET_SVE"
- "bic\t%0.d, %2.d, %1.d"
+ {
+ rtx sign = gen_reg_rtx (<V_INT_EQUIV>mode);
+ rtx mant = gen_reg_rtx (<V_INT_EQUIV>mode);
+ rtx int_res = gen_reg_rtx (<V_INT_EQUIV>mode);
+ int bits = GET_MODE_UNIT_BITSIZE (<MODE>mode) - 1;
+
+ rtx arg1 = lowpart_subreg (<V_INT_EQUIV>mode, operands[1], <MODE>mode);
+ rtx arg2 = lowpart_subreg (<V_INT_EQUIV>mode, operands[2], <MODE>mode);
+
+ emit_insn (gen_and<v_int_equiv>3
+ (sign, arg2,
+ aarch64_simd_gen_const_vector_dup (<V_INT_EQUIV>mode,
+ HOST_WIDE_INT_M1U
+ << bits)));
+ emit_insn (gen_and<v_int_equiv>3
+ (mant, arg1,
+ aarch64_simd_gen_const_vector_dup (<V_INT_EQUIV>mode,
+ ~(HOST_WIDE_INT_M1U
+ << bits))));
+ emit_insn (gen_ior<v_int_equiv>3 (int_res, sign, mant));
+ emit_move_insn (operands[0], gen_lowpart (<MODE>mode, int_res));
+ DONE;
+ }
+)
+
+(define_expand "xorsign<mode>3"
+ [(match_operand:SVE_F 0 "register_operand")
+ (match_operand:SVE_F 1 "register_operand")
+ (match_operand:SVE_F 2 "register_operand")]
+ "TARGET_SVE"
+ {
+ rtx sign = gen_reg_rtx (<V_INT_EQUIV>mode);
+ rtx int_res = gen_reg_rtx (<V_INT_EQUIV>mode);
+ int bits = GET_MODE_UNIT_BITSIZE (<MODE>mode) - 1;
+
+ rtx arg1 = lowpart_subreg (<V_INT_EQUIV>mode, operands[1], <MODE>mode);
+ rtx arg2 = lowpart_subreg (<V_INT_EQUIV>mode, operands[2], <MODE>mode);
+
+ emit_insn (gen_and<v_int_equiv>3
+ (sign, arg2,
+ aarch64_simd_gen_const_vector_dup (<V_INT_EQUIV>mode,
+ HOST_WIDE_INT_M1U
+ << bits)));
+ emit_insn (gen_xor<v_int_equiv>3 (int_res, arg1, sign));
+ emit_move_insn (operands[0], gen_lowpart (<MODE>mode, int_res));
+ DONE;
+ }
)
+;; -------------------------------------------------------------------------
+;; ---- [FP] Maximum and minimum
+;; -------------------------------------------------------------------------
+;; Includes:
+;; - FMAX
+;; - FMAXNM
+;; - FMIN
+;; - FMINNM
+;; -------------------------------------------------------------------------
+
+;; Unpredicated floating-point MAX/MIN.
+(define_expand "<su><maxmin><mode>3"
+ [(set (match_operand:SVE_F 0 "register_operand")
+ (unspec:SVE_F
+ [(match_dup 3)
+ (FMAXMIN:SVE_F (match_operand:SVE_F 1 "register_operand")
+ (match_operand:SVE_F 2 "register_operand"))]
+ UNSPEC_MERGE_PTRUE))]
+ "TARGET_SVE"
+ {
+ operands[3] = aarch64_ptrue_reg (<VPRED>mode);
+ }
+)
+
+;; Floating-point MAX/MIN predicated with a PTRUE.
+(define_insn "*<su><maxmin><mode>3"
+ [(set (match_operand:SVE_F 0 "register_operand" "=w, ?&w")
+ (unspec:SVE_F
+ [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
+ (FMAXMIN:SVE_F (match_operand:SVE_F 2 "register_operand" "%0, w")
+ (match_operand:SVE_F 3 "register_operand" "w, w"))]
+ UNSPEC_MERGE_PTRUE))]
+ "TARGET_SVE"
+ "@
+ f<maxmin>nm\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
+ movprfx\t%0, %2\;f<maxmin>nm\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>"
+ [(set_attr "movprfx" "*,yes")]
+)
+
+;; Unpredicated fmax/fmin.
+(define_expand "<maxmin_uns><mode>3"
+ [(set (match_operand:SVE_F 0 "register_operand")
+ (unspec:SVE_F
+ [(match_dup 3)
+ (unspec:SVE_F [(match_operand:SVE_F 1 "register_operand")
+ (match_operand:SVE_F 2 "register_operand")]
+ FMAXMIN_UNS)]
+ UNSPEC_MERGE_PTRUE))]
+ "TARGET_SVE"
+ {
+ operands[3] = aarch64_ptrue_reg (<VPRED>mode);
+ }
+)
+
+;; fmax/fmin predicated with a PTRUE.
+(define_insn "*<maxmin_uns><mode>3"
+ [(set (match_operand:SVE_F 0 "register_operand" "=w, ?&w")
+ (unspec:SVE_F
+ [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
+ (unspec:SVE_F [(match_operand:SVE_F 2 "register_operand" "%0, w")
+ (match_operand:SVE_F 3 "register_operand" "w, w")]
+ FMAXMIN_UNS)]
+ UNSPEC_MERGE_PTRUE))]
+ "TARGET_SVE"
+ "@
+ <maxmin_uns_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
+ movprfx\t%0, %2\;<maxmin_uns_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>"
+ [(set_attr "movprfx" "*,yes")]
+)
+
+;; Merging forms are handled through SVE_COND_FP_BINARY.
+
+;; -------------------------------------------------------------------------
+;; ---- [PRED] Binary logical operations
+;; -------------------------------------------------------------------------
+;; Includes:
+;; - AND
+;; - ANDS
+;; - EOR
+;; - EORS
+;; - ORR
+;; - ORRS
+;; -------------------------------------------------------------------------
+
;; Predicate AND. We can reuse one of the inputs as the GP.
(define_insn "and<mode>3"
[(set (match_operand:PRED_ALL 0 "register_operand" "=Upa")
@@ -1141,7 +2309,7 @@ (define_insn "and<mode>3"
"and\t%0.b, %1/z, %1.b, %2.b"
)
-;; Unpredicated predicate ORR and XOR.
+;; Unpredicated predicate EOR and ORR.
(define_expand "<optab><mode>3"
[(set (match_operand:PRED_ALL 0 "register_operand")
(and:PRED_ALL
@@ -1155,7 +2323,7 @@ (define_expand "<optab><mode>3"
}
)
-;; Predicated predicate ORR and XOR.
+;; Predicated predicate AND, EOR and ORR.
(define_insn "pred_<optab><mode>3"
[(set (match_operand:PRED_ALL 0 "register_operand" "=Upa")
(and:PRED_ALL
@@ -1189,27 +2357,13 @@ (define_insn "*<optab><mode>3_cc"
"<logical>s\t%0.b, %1/z, %2.b, %3.b"
)
-;; Unpredicated predicate inverse.
-(define_expand "one_cmpl<mode>2"
- [(set (match_operand:PRED_ALL 0 "register_operand")
- (and:PRED_ALL
- (not:PRED_ALL (match_operand:PRED_ALL 1 "register_operand"))
- (match_dup 2)))]
- "TARGET_SVE"
- {
- operands[2] = aarch64_ptrue_reg (<MODE>mode);
- }
-)
-
-;; Predicated predicate inverse.
-(define_insn "*one_cmpl<mode>3"
- [(set (match_operand:PRED_ALL 0 "register_operand" "=Upa")
- (and:PRED_ALL
- (not:PRED_ALL (match_operand:PRED_ALL 2 "register_operand" "Upa"))
- (match_operand:PRED_ALL 1 "register_operand" "Upa")))]
- "TARGET_SVE"
- "not\t%0.b, %1/z, %2.b"
-)
+;; -------------------------------------------------------------------------
+;; ---- [PRED] Binary logical operations (inverted second input)
+;; -------------------------------------------------------------------------
+;; Includes:
+;; - BIC
+;; - ORN
+;; -------------------------------------------------------------------------
;; Predicated predicate BIC and ORN.
(define_insn "*<nlogical><mode>3"
@@ -1223,6 +2377,14 @@ (define_insn "*<nlogical><mode>3"
"<nlogical>\t%0.b, %1/z, %3.b, %2.b"
)
+;; -------------------------------------------------------------------------
+;; ---- [PRED] Binary logical operations (inverted result)
+;; -------------------------------------------------------------------------
+;; Includes:
+;; - NAND
+;; - NOR
+;; -------------------------------------------------------------------------
+
;; Predicated predicate NAND and NOR.
(define_insn "*<logical_nn><mode>3"
[(set (match_operand:PRED_ALL 0 "register_operand" "=Upa")
@@ -1235,135 +2397,534 @@ (define_insn "*<logical_nn><mode>3"
"<logical_nn>\t%0.b, %1/z, %2.b, %3.b"
)
-;; Unpredicated LSL, LSR and ASR by a vector.
-(define_expand "v<optab><mode>3"
- [(set (match_operand:SVE_I 0 "register_operand")
- (unspec:SVE_I
- [(match_dup 3)
- (ASHIFT:SVE_I
- (match_operand:SVE_I 1 "register_operand")
- (match_operand:SVE_I 2 "aarch64_sve_<lr>shift_operand"))]
+;; =========================================================================
+;; == Ternary arithmetic
+;; =========================================================================
+
+;; -------------------------------------------------------------------------
+;; ---- [INT] MLA and MAD
+;; -------------------------------------------------------------------------
+;; Includes:
+;; - MAD
+;; - MLA
+;; -------------------------------------------------------------------------
+
+;; Predicated integer addition of product.
+(define_insn "*madd<mode>"
+ [(set (match_operand:SVE_I 0 "register_operand" "=w, w, ?&w")
+ (plus:SVE_I
+ (unspec:SVE_I
+ [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl")
+ (mult:SVE_I (match_operand:SVE_I 2 "register_operand" "%0, w, w")
+ (match_operand:SVE_I 3 "register_operand" "w, w, w"))]
+ UNSPEC_MERGE_PTRUE)
+ (match_operand:SVE_I 4 "register_operand" "w, 0, w")))]
+ "TARGET_SVE"
+ "@
+ mad\t%0.<Vetype>, %1/m, %3.<Vetype>, %4.<Vetype>
+ mla\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>
+ movprfx\t%0, %4\;mla\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>"
+ [(set_attr "movprfx" "*,*,yes")]
+)
+
+;; -------------------------------------------------------------------------
+;; ---- [INT] MLS and MSB
+;; -------------------------------------------------------------------------
+;; Includes:
+;; - MLS
+;; - MSB
+;; -------------------------------------------------------------------------
+
+;; Predicated integer subtraction of product.
+(define_insn "*msub<mode>3"
+ [(set (match_operand:SVE_I 0 "register_operand" "=w, w, ?&w")
+ (minus:SVE_I
+ (match_operand:SVE_I 4 "register_operand" "w, 0, w")
+ (unspec:SVE_I
+ [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl")
+ (mult:SVE_I (match_operand:SVE_I 2 "register_operand" "%0, w, w")
+ (match_operand:SVE_I 3 "register_operand" "w, w, w"))]
+ UNSPEC_MERGE_PTRUE)))]
+ "TARGET_SVE"
+ "@
+ msb\t%0.<Vetype>, %1/m, %3.<Vetype>, %4.<Vetype>
+ mls\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>
+ movprfx\t%0, %4\;mls\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>"
+ [(set_attr "movprfx" "*,*,yes")]
+)
+
+;; -------------------------------------------------------------------------
+;; ---- [INT] Dot product
+;; -------------------------------------------------------------------------
+;; Includes:
+;; - SDOT
+;; - UDOT
+;; -------------------------------------------------------------------------
+
+;; Four-element integer dot-product with accumulation.
+(define_insn "<sur>dot_prod<vsi2qi>"
+ [(set (match_operand:SVE_SDI 0 "register_operand" "=w, ?&w")
+ (plus:SVE_SDI
+ (unspec:SVE_SDI
+ [(match_operand:<VSI2QI> 1 "register_operand" "w, w")
+ (match_operand:<VSI2QI> 2 "register_operand" "w, w")]
+ DOTPROD)
+ (match_operand:SVE_SDI 3 "register_operand" "0, w")))]
+ "TARGET_SVE"
+ "@
+ <sur>dot\\t%0.<Vetype>, %1.<Vetype_fourth>, %2.<Vetype_fourth>
+ movprfx\t%0, %3\;<sur>dot\\t%0.<Vetype>, %1.<Vetype_fourth>, %2.<Vetype_fourth>"
+ [(set_attr "movprfx" "*,yes")]
+)
+
+;; -------------------------------------------------------------------------
+;; ---- [INT] Sum of absolute differences
+;; -------------------------------------------------------------------------
+;; The patterns in this section are synthetic.
+;; -------------------------------------------------------------------------
+
+;; Emit a sequence to produce a sum-of-absolute-differences of the inputs in
+;; operands 1 and 2. The sequence also has to perform a widening reduction of
+;; the difference into a vector and accumulate that into operand 3 before
+;; copying that into the result operand 0.
+;; Perform that with a sequence of:
+;; MOV ones.b, #1
+;; [SU]ABD diff.b, p0/m, op1.b, op2.b
+;; MOVPRFX op0, op3 // If necessary
+;; UDOT op0.s, diff.b, ones.b
+(define_expand "<sur>sad<vsi2qi>"
+ [(use (match_operand:SVE_SDI 0 "register_operand"))
+ (unspec:<VSI2QI> [(use (match_operand:<VSI2QI> 1 "register_operand"))
+ (use (match_operand:<VSI2QI> 2 "register_operand"))] ABAL)
+ (use (match_operand:SVE_SDI 3 "register_operand"))]
+ "TARGET_SVE"
+ {
+ rtx ones = force_reg (<VSI2QI>mode, CONST1_RTX (<VSI2QI>mode));
+ rtx diff = gen_reg_rtx (<VSI2QI>mode);
+ emit_insn (gen_<sur>abd<vsi2qi>_3 (diff, operands[1], operands[2]));
+ emit_insn (gen_udot_prod<vsi2qi> (operands[0], diff, ones, operands[3]));
+ DONE;
+ }
+)
+
+;; -------------------------------------------------------------------------
+;; ---- [FP] General ternary arithmetic corresponding to unspecs
+;; -------------------------------------------------------------------------
+;; Includes merging patterns for:
+;; - FMAD
+;; - FMLA
+;; - FMLS
+;; - FMSB
+;; - FNMAD
+;; - FNMLA
+;; - FNMLS
+;; - FNMSB
+;; -------------------------------------------------------------------------
+
+;; Predicated floating-point ternary operations with merging.
+(define_expand "cond_<optab><mode>"
+ [(set (match_operand:SVE_F 0 "register_operand")
+ (unspec:SVE_F
+ [(match_operand:<VPRED> 1 "register_operand")
+ (unspec:SVE_F
+ [(match_operand:SVE_F 2 "register_operand")
+ (match_operand:SVE_F 3 "register_operand")
+ (match_operand:SVE_F 4 "register_operand")]
+ SVE_COND_FP_TERNARY)
+ (match_operand:SVE_F 5 "aarch64_simd_reg_or_zero")]
+ UNSPEC_SEL))]
+ "TARGET_SVE"
+{
+ /* Swap the multiplication operands if the fallback value is the
+ second of the two. */
+ if (rtx_equal_p (operands[3], operands[5]))
+ std::swap (operands[2], operands[3]);
+})
+
+;; Predicated floating-point ternary operations, merging with the
+;; first input.
+(define_insn "*cond_<optab><mode>_2"
+ [(set (match_operand:SVE_F 0 "register_operand" "=w, ?&w")
+ (unspec:SVE_F
+ [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
+ (unspec:SVE_F
+ [(match_operand:SVE_F 2 "register_operand" "0, w")
+ (match_operand:SVE_F 3 "register_operand" "w, w")
+ (match_operand:SVE_F 4 "register_operand" "w, w")]
+ SVE_COND_FP_TERNARY)
+ (match_dup 2)]
+ UNSPEC_SEL))]
+ "TARGET_SVE"
+ "@
+ <sve_fmad_op>\t%0.<Vetype>, %1/m, %3.<Vetype>, %4.<Vetype>
+ movprfx\t%0, %2\;<sve_fmad_op>\t%0.<Vetype>, %1/m, %3.<Vetype>, %4.<Vetype>"
+ [(set_attr "movprfx" "*,yes")]
+)
+
+;; Predicated floating-point ternary operations, merging with the
+;; third input.
+(define_insn "*cond_<optab><mode>_4"
+ [(set (match_operand:SVE_F 0 "register_operand" "=w, ?&w")
+ (unspec:SVE_F
+ [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
+ (unspec:SVE_F
+ [(match_operand:SVE_F 2 "register_operand" "w, w")
+ (match_operand:SVE_F 3 "register_operand" "w, w")
+ (match_operand:SVE_F 4 "register_operand" "0, w")]
+ SVE_COND_FP_TERNARY)
+ (match_dup 4)]
+ UNSPEC_SEL))]
+ "TARGET_SVE"
+ "@
+ <sve_fmla_op>\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>
+ movprfx\t%0, %4\;<sve_fmla_op>\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>"
+ [(set_attr "movprfx" "*,yes")]
+)
+
+;; Predicated floating-point ternary operations, merging with an
+;; independent value.
+(define_insn_and_rewrite "*cond_<optab><mode>_any"
+ [(set (match_operand:SVE_F 0 "register_operand" "=&w, &w, ?&w")
+ (unspec:SVE_F
+ [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl")
+ (unspec:SVE_F
+ [(match_operand:SVE_F 2 "register_operand" "w, w, w")
+ (match_operand:SVE_F 3 "register_operand" "w, w, w")
+ (match_operand:SVE_F 4 "register_operand" "w, w, w")]
+ SVE_COND_FP_TERNARY)
+ (match_operand:SVE_F 5 "aarch64_simd_reg_or_zero" "Dz, 0, w")]
+ UNSPEC_SEL))]
+ "TARGET_SVE
+ && !rtx_equal_p (operands[2], operands[5])
+ && !rtx_equal_p (operands[3], operands[5])
+ && !rtx_equal_p (operands[4], operands[5])"
+ "@
+ movprfx\t%0.<Vetype>, %1/z, %4.<Vetype>\;<sve_fmla_op>\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>
+ movprfx\t%0.<Vetype>, %1/m, %4.<Vetype>\;<sve_fmla_op>\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>
+ #"
+ "&& reload_completed
+ && !CONSTANT_P (operands[5])
+ && !rtx_equal_p (operands[0], operands[5])"
+ {
+ emit_insn (gen_vcond_mask_<mode><vpred> (operands[0], operands[4],
+ operands[5], operands[1]));
+ operands[5] = operands[4] = operands[0];
+ }
+ [(set_attr "movprfx" "yes")]
+)
+
+;; -------------------------------------------------------------------------
+;; ---- [FP] FMLA and FMAD
+;; -------------------------------------------------------------------------
+;; Includes:
+;; - FMAD
+;; - FMLA
+;; -------------------------------------------------------------------------
+
+;; Unpredicated fma (%0 = (%1 * %2) + %3).
+(define_expand "fma<mode>4"
+ [(set (match_operand:SVE_F 0 "register_operand")
+ (unspec:SVE_F
+ [(match_dup 4)
+ (fma:SVE_F (match_operand:SVE_F 1 "register_operand")
+ (match_operand:SVE_F 2 "register_operand")
+ (match_operand:SVE_F 3 "register_operand"))]
UNSPEC_MERGE_PTRUE))]
"TARGET_SVE"
{
- operands[3] = aarch64_ptrue_reg (<VPRED>mode);
+ operands[4] = aarch64_ptrue_reg (<VPRED>mode);
}
)
-;; LSL, LSR and ASR by a vector, predicated with a PTRUE. We don't
-;; actually need the predicate for the first alternative, but using Upa
-;; or X isn't likely to gain much and would make the instruction seem
-;; less uniform to the register allocator.
-(define_insn_and_split "*v<optab><mode>3"
- [(set (match_operand:SVE_I 0 "register_operand" "=w, w, ?&w")
- (unspec:SVE_I
+;; fma predicated with a PTRUE.
+(define_insn "*fma<mode>4"
+ [(set (match_operand:SVE_F 0 "register_operand" "=w, w, ?&w")
+ (unspec:SVE_F
[(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl")
- (ASHIFT:SVE_I
- (match_operand:SVE_I 2 "register_operand" "w, 0, w")
- (match_operand:SVE_I 3 "aarch64_sve_<lr>shift_operand" "D<lr>, w, w"))]
+ (fma:SVE_F (match_operand:SVE_F 3 "register_operand" "%0, w, w")
+ (match_operand:SVE_F 4 "register_operand" "w, w, w")
+ (match_operand:SVE_F 2 "register_operand" "w, 0, w"))]
UNSPEC_MERGE_PTRUE))]
"TARGET_SVE"
"@
- #
- <shift>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
- movprfx\t%0, %2\;<shift>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>"
- "&& reload_completed
- && !register_operand (operands[3], <MODE>mode)"
- [(set (match_dup 0) (ASHIFT:SVE_I (match_dup 2) (match_dup 3)))]
- ""
+ fmad\t%0.<Vetype>, %1/m, %4.<Vetype>, %2.<Vetype>
+ fmla\t%0.<Vetype>, %1/m, %3.<Vetype>, %4.<Vetype>
+ movprfx\t%0, %2\;fmla\t%0.<Vetype>, %1/m, %3.<Vetype>, %4.<Vetype>"
[(set_attr "movprfx" "*,*,yes")]
)
-;; Unpredicated shift operations by a constant (post-RA only).
-;; These are generated by splitting a predicated instruction whose
-;; predicate is unused.
-(define_insn "*post_ra_v<optab><mode>3"
+;; -------------------------------------------------------------------------
+;; ---- [FP] FMLS and FMSB
+;; -------------------------------------------------------------------------
+;; Includes:
+;; - FMLS
+;; - FMSB
+;; -------------------------------------------------------------------------
+
+;; Unpredicated fnma (%0 = (-%1 * %2) + %3).
+(define_expand "fnma<mode>4"
+ [(set (match_operand:SVE_F 0 "register_operand")
+ (unspec:SVE_F
+ [(match_dup 4)
+ (fma:SVE_F (neg:SVE_F
+ (match_operand:SVE_F 1 "register_operand"))
+ (match_operand:SVE_F 2 "register_operand")
+ (match_operand:SVE_F 3 "register_operand"))]
+ UNSPEC_MERGE_PTRUE))]
+ "TARGET_SVE"
+ {
+ operands[4] = aarch64_ptrue_reg (<VPRED>mode);
+ }
+)
+
+;; fnma predicated with a PTRUE.
+(define_insn "*fnma<mode>4"
+ [(set (match_operand:SVE_F 0 "register_operand" "=w, w, ?&w")
+ (unspec:SVE_F
+ [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl")
+ (fma:SVE_F (neg:SVE_F
+ (match_operand:SVE_F 3 "register_operand" "%0, w, w"))
+ (match_operand:SVE_F 4 "register_operand" "w, w, w")
+ (match_operand:SVE_F 2 "register_operand" "w, 0, w"))]
+ UNSPEC_MERGE_PTRUE))]
+ "TARGET_SVE"
+ "@
+ fmsb\t%0.<Vetype>, %1/m, %4.<Vetype>, %2.<Vetype>
+ fmls\t%0.<Vetype>, %1/m, %3.<Vetype>, %4.<Vetype>
+ movprfx\t%0, %2\;fmls\t%0.<Vetype>, %1/m, %3.<Vetype>, %4.<Vetype>"
+ [(set_attr "movprfx" "*,*,yes")]
+)
+
+;; -------------------------------------------------------------------------
+;; ---- [FP] FNMLA and FNMAD
+;; -------------------------------------------------------------------------
+;; Includes:
+;; - FNMAD
+;; - FNMLA
+;; -------------------------------------------------------------------------
+
+;; Unpredicated fnms (%0 = (-%1 * %2) - %3).
+(define_expand "fnms<mode>4"
+ [(set (match_operand:SVE_F 0 "register_operand")
+ (unspec:SVE_F
+ [(match_dup 4)
+ (fma:SVE_F (neg:SVE_F
+ (match_operand:SVE_F 1 "register_operand"))
+ (match_operand:SVE_F 2 "register_operand")
+ (neg:SVE_F
+ (match_operand:SVE_F 3 "register_operand")))]
+ UNSPEC_MERGE_PTRUE))]
+ "TARGET_SVE"
+ {
+ operands[4] = aarch64_ptrue_reg (<VPRED>mode);
+ }
+)
+
+;; fnms predicated with a PTRUE.
+(define_insn "*fnms<mode>4"
+ [(set (match_operand:SVE_F 0 "register_operand" "=w, w, ?&w")
+ (unspec:SVE_F
+ [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl")
+ (fma:SVE_F (neg:SVE_F
+ (match_operand:SVE_F 3 "register_operand" "%0, w, w"))
+ (match_operand:SVE_F 4 "register_operand" "w, w, w")
+ (neg:SVE_F
+ (match_operand:SVE_F 2 "register_operand" "w, 0, w")))]
+ UNSPEC_MERGE_PTRUE))]
+ "TARGET_SVE"
+ "@
+ fnmad\t%0.<Vetype>, %1/m, %4.<Vetype>, %2.<Vetype>
+ fnmla\t%0.<Vetype>, %1/m, %3.<Vetype>, %4.<Vetype>
+ movprfx\t%0, %2\;fnmla\t%0.<Vetype>, %1/m, %3.<Vetype>, %4.<Vetype>"
+ [(set_attr "movprfx" "*,*,yes")]
+)
+
+;; -------------------------------------------------------------------------
+;; ---- [FP] FNMLS and FNMSB
+;; -------------------------------------------------------------------------
+;; Includes:
+;; - FNMLS
+;; - FNMSB
+;; -------------------------------------------------------------------------
+
+;; Unpredicated fms (%0 = (%1 * %2) - %3).
+(define_expand "fms<mode>4"
+ [(set (match_operand:SVE_F 0 "register_operand")
+ (unspec:SVE_F
+ [(match_dup 4)
+ (fma:SVE_F (match_operand:SVE_F 1 "register_operand")
+ (match_operand:SVE_F 2 "register_operand")
+ (neg:SVE_F
+ (match_operand:SVE_F 3 "register_operand")))]
+ UNSPEC_MERGE_PTRUE))]
+ "TARGET_SVE"
+ {
+ operands[4] = aarch64_ptrue_reg (<VPRED>mode);
+ }
+)
+
+;; fms predicated with a PTRUE.
+(define_insn "*fms<mode>4"
+ [(set (match_operand:SVE_F 0 "register_operand" "=w, w, ?&w")
+ (unspec:SVE_F
+ [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl")
+ (fma:SVE_F (match_operand:SVE_F 3 "register_operand" "%0, w, w")
+ (match_operand:SVE_F 4 "register_operand" "w, w, w")
+ (neg:SVE_F
+ (match_operand:SVE_F 2 "register_operand" "w, 0, w")))]
+ UNSPEC_MERGE_PTRUE))]
+ "TARGET_SVE"
+ "@
+ fnmsb\t%0.<Vetype>, %1/m, %4.<Vetype>, %2.<Vetype>
+ fnmls\t%0.<Vetype>, %1/m, %3.<Vetype>, %4.<Vetype>
+ movprfx\t%0, %2\;fnmls\t%0.<Vetype>, %1/m, %3.<Vetype>, %4.<Vetype>"
+ [(set_attr "movprfx" "*,*,yes")]
+)
+
+;; =========================================================================
+;; == Comparisons and selects
+;; =========================================================================
+
+;; -------------------------------------------------------------------------
+;; ---- [INT,FP] Select based on predicates
+;; -------------------------------------------------------------------------
+;; Includes merging patterns for:
+;; - MOV
+;; - SEL
+;; -------------------------------------------------------------------------
+
+;; vcond_mask operand order: true, false, mask
+;; UNSPEC_SEL operand order: mask, true, false (as for VEC_COND_EXPR)
+;; SEL operand order: mask, true, false
+(define_insn "vcond_mask_<mode><vpred>"
+ [(set (match_operand:SVE_ALL 0 "register_operand" "=w")
+ (unspec:SVE_ALL
+ [(match_operand:<VPRED> 3 "register_operand" "Upa")
+ (match_operand:SVE_ALL 1 "register_operand" "w")
+ (match_operand:SVE_ALL 2 "register_operand" "w")]
+ UNSPEC_SEL))]
+ "TARGET_SVE"
+ "sel\t%0.<Vetype>, %3, %1.<Vetype>, %2.<Vetype>"
+)
+
+;; Selects between a duplicated immediate and zero.
+(define_insn "aarch64_sve_dup<mode>_const"
[(set (match_operand:SVE_I 0 "register_operand" "=w")
- (ASHIFT:SVE_I
- (match_operand:SVE_I 1 "register_operand" "w")
- (match_operand:SVE_I 2 "aarch64_simd_<lr>shift_imm")))]
- "TARGET_SVE && reload_completed"
- "<shift>\t%0.<Vetype>, %1.<Vetype>, #%2"
+ (unspec:SVE_I
+ [(match_operand:<VPRED> 1 "register_operand" "Upl")
+ (match_operand:SVE_I 2 "aarch64_sve_dup_immediate")
+ (match_operand:SVE_I 3 "aarch64_simd_imm_zero")]
+ UNSPEC_SEL))]
+ "TARGET_SVE"
+ "mov\t%0.<Vetype>, %1/z, #%2"
)
-;; LSL, LSR and ASR by a scalar, which expands into one of the vector
-;; shifts above.
-(define_expand "<ASHIFT:optab><mode>3"
- [(set (match_operand:SVE_I 0 "register_operand")
- (ASHIFT:SVE_I (match_operand:SVE_I 1 "register_operand")
- (match_operand:<VEL> 2 "general_operand")))]
+;; -------------------------------------------------------------------------
+;; ---- [INT,FP] Compare and select
+;; -------------------------------------------------------------------------
+;; The patterns in this section are synthetic.
+;; -------------------------------------------------------------------------
+
+;; Integer (signed) vcond. Don't enforce an immediate range here, since it
+;; depends on the comparison; leave it to aarch64_expand_sve_vcond instead.
+(define_expand "vcond<mode><v_int_equiv>"
+ [(set (match_operand:SVE_ALL 0 "register_operand")
+ (if_then_else:SVE_ALL
+ (match_operator 3 "comparison_operator"
+ [(match_operand:<V_INT_EQUIV> 4 "register_operand")
+ (match_operand:<V_INT_EQUIV> 5 "nonmemory_operand")])
+ (match_operand:SVE_ALL 1 "register_operand")
+ (match_operand:SVE_ALL 2 "register_operand")))]
"TARGET_SVE"
{
- rtx amount;
- if (CONST_INT_P (operands[2]))
- {
- amount = gen_const_vec_duplicate (<MODE>mode, operands[2]);
- if (!aarch64_sve_<lr>shift_operand (operands[2], <MODE>mode))
- amount = force_reg (<MODE>mode, amount);
- }
- else
- {
- amount = gen_reg_rtx (<MODE>mode);
- emit_insn (gen_vec_duplicate<mode> (amount,
- convert_to_mode (<VEL>mode,
- operands[2], 0)));
- }
- emit_insn (gen_v<optab><mode>3 (operands[0], operands[1], amount));
+ aarch64_expand_sve_vcond (<MODE>mode, <V_INT_EQUIV>mode, operands);
DONE;
}
)
-;; Test all bits of operand 1. Operand 0 is a GP that is known to hold PTRUE.
-;;
-;; Using UNSPEC_PTEST_PTRUE allows combine patterns to assume that the GP
-;; is a PTRUE even if the optimizers haven't yet been able to propagate
-;; the constant. We would use a separate unspec code for PTESTs involving
-;; GPs that might not be PTRUEs.
-(define_insn "ptest_ptrue<mode>"
- [(set (reg:CC_NZC CC_REGNUM)
- (unspec:CC_NZC
- [(match_operand:PRED_ALL 0 "register_operand" "Upa")
- (match_operand:PRED_ALL 1 "register_operand" "Upa")]
- UNSPEC_PTEST_PTRUE))]
+;; Integer vcondu. Don't enforce an immediate range here, since it
+;; depends on the comparison; leave it to aarch64_expand_sve_vcond instead.
+(define_expand "vcondu<mode><v_int_equiv>"
+ [(set (match_operand:SVE_ALL 0 "register_operand")
+ (if_then_else:SVE_ALL
+ (match_operator 3 "comparison_operator"
+ [(match_operand:<V_INT_EQUIV> 4 "register_operand")
+ (match_operand:<V_INT_EQUIV> 5 "nonmemory_operand")])
+ (match_operand:SVE_ALL 1 "register_operand")
+ (match_operand:SVE_ALL 2 "register_operand")))]
"TARGET_SVE"
- "ptest\t%0, %1.b"
+ {
+ aarch64_expand_sve_vcond (<MODE>mode, <V_INT_EQUIV>mode, operands);
+ DONE;
+ }
)
-;; Set element I of the result if operand1 + J < operand2 for all J in [0, I].
-;; with the comparison being unsigned.
-(define_insn "while_ult<GPI:mode><PRED_ALL:mode>"
- [(set (match_operand:PRED_ALL 0 "register_operand" "=Upa")
- (unspec:PRED_ALL [(match_operand:GPI 1 "aarch64_reg_or_zero" "rZ")
- (match_operand:GPI 2 "aarch64_reg_or_zero" "rZ")]
- UNSPEC_WHILE_LO))
- (clobber (reg:CC_NZC CC_REGNUM))]
+;; Floating-point vcond. All comparisons except FCMUO allow a zero operand;
+;; aarch64_expand_sve_vcond handles the case of an FCMUO with zero.
+(define_expand "vcond<mode><v_fp_equiv>"
+ [(set (match_operand:SVE_SD 0 "register_operand")
+ (if_then_else:SVE_SD
+ (match_operator 3 "comparison_operator"
+ [(match_operand:<V_FP_EQUIV> 4 "register_operand")
+ (match_operand:<V_FP_EQUIV> 5 "aarch64_simd_reg_or_zero")])
+ (match_operand:SVE_SD 1 "register_operand")
+ (match_operand:SVE_SD 2 "register_operand")))]
"TARGET_SVE"
- "whilelo\t%0.<PRED_ALL:Vetype>, %<w>1, %<w>2"
+ {
+ aarch64_expand_sve_vcond (<MODE>mode, <V_FP_EQUIV>mode, operands);
+ DONE;
+ }
)
-;; WHILELO sets the flags in the same way as a PTEST with a PTRUE GP.
-;; Handle the case in which both results are useful. The GP operand
-;; to the PTEST isn't needed, so we allow it to be anything.
-(define_insn_and_rewrite "*while_ult<GPI:mode><PRED_ALL:mode>_cc"
- [(set (reg:CC_NZC CC_REGNUM)
- (unspec:CC_NZC
- [(match_operand:PRED_ALL 1)
- (unspec:PRED_ALL
- [(match_operand:GPI 2 "aarch64_reg_or_zero" "rZ")
- (match_operand:GPI 3 "aarch64_reg_or_zero" "rZ")]
- UNSPEC_WHILE_LO)]
- UNSPEC_PTEST_PTRUE))
- (set (match_operand:PRED_ALL 0 "register_operand" "=Upa")
- (unspec:PRED_ALL [(match_dup 2)
- (match_dup 3)]
- UNSPEC_WHILE_LO))]
+;; -------------------------------------------------------------------------
+;; ---- [INT] Comparisons
+;; -------------------------------------------------------------------------
+;; Includes merging patterns for:
+;; - CMPEQ
+;; - CMPGE
+;; - CMPGT
+;; - CMPHI
+;; - CMPHS
+;; - CMPLE
+;; - CMPLO
+;; - CMPLS
+;; - CMPLT
+;; - CMPNE
+;; -------------------------------------------------------------------------
+
+;; Signed integer comparisons. Don't enforce an immediate range here, since
+;; it depends on the comparison; leave it to aarch64_expand_sve_vec_cmp_int
+;; instead.
+(define_expand "vec_cmp<mode><vpred>"
+ [(parallel
+ [(set (match_operand:<VPRED> 0 "register_operand")
+ (match_operator:<VPRED> 1 "comparison_operator"
+ [(match_operand:SVE_I 2 "register_operand")
+ (match_operand:SVE_I 3 "nonmemory_operand")]))
+ (clobber (reg:CC_NZC CC_REGNUM))])]
"TARGET_SVE"
- "whilelo\t%0.<PRED_ALL:Vetype>, %<w>2, %<w>3"
- ;; Force the compiler to drop the unused predicate operand, so that we
- ;; don't have an unnecessary PTRUE.
- "&& !CONSTANT_P (operands[1])"
{
- operands[1] = CONSTM1_RTX (<PRED_ALL:MODE>mode);
+ aarch64_expand_sve_vec_cmp_int (operands[0], GET_CODE (operands[1]),
+ operands[2], operands[3]);
+ DONE;
+ }
+)
+
+;; Unsigned integer comparisons. Don't enforce an immediate range here, since
+;; it depends on the comparison; leave it to aarch64_expand_sve_vec_cmp_int
+;; instead.
+(define_expand "vec_cmpu<mode><vpred>"
+ [(parallel
+ [(set (match_operand:<VPRED> 0 "register_operand")
+ (match_operator:<VPRED> 1 "comparison_operator"
+ [(match_operand:SVE_I 2 "register_operand")
+ (match_operand:SVE_I 3 "nonmemory_operand")]))
+ (clobber (reg:CC_NZC CC_REGNUM))])]
+ "TARGET_SVE"
+ {
+ aarch64_expand_sve_vec_cmp_int (operands[0], GET_CODE (operands[1]),
+ operands[2], operands[3]);
+ DONE;
}
)
@@ -1383,9 +2944,9 @@ (define_insn "*cmp<cmp_op><mode>"
cmp<cmp_op>\t%0.<Vetype>, %1/z, %2.<Vetype>, %3.<Vetype>"
)
-;; Integer comparisons predicated with a PTRUE in which only the flags result
-;; is interesting.
-(define_insn "*cmp<cmp_op><mode>_ptest"
+;; Integer comparisons predicated with a PTRUE in which both the flag and
+;; predicate results are interesting.
+(define_insn "*cmp<cmp_op><mode>_cc"
[(set (reg:CC_NZC CC_REGNUM)
(unspec:CC_NZC
[(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
@@ -1396,16 +2957,22 @@ (define_insn "*cmp<cmp_op><mode>_ptest"
(match_operand:SVE_I 3 "aarch64_sve_cmp_<sve_imm_con>_operand" "<sve_imm_con>, w"))]
UNSPEC_MERGE_PTRUE)]
UNSPEC_PTEST_PTRUE))
- (clobber (match_scratch:<VPRED> 0 "=Upa, Upa"))]
+ (set (match_operand:<VPRED> 0 "register_operand" "=Upa, Upa")
+ (unspec:<VPRED>
+ [(match_dup 1)
+ (SVE_INT_CMP:<VPRED>
+ (match_dup 2)
+ (match_dup 3))]
+ UNSPEC_MERGE_PTRUE))]
"TARGET_SVE"
"@
cmp<cmp_op>\t%0.<Vetype>, %1/z, %2.<Vetype>, #%3
cmp<cmp_op>\t%0.<Vetype>, %1/z, %2.<Vetype>, %3.<Vetype>"
)
-;; Integer comparisons predicated with a PTRUE in which both the flag and
-;; predicate results are interesting.
-(define_insn "*cmp<cmp_op><mode>_cc"
+;; Integer comparisons predicated with a PTRUE in which only the flags result
+;; is interesting.
+(define_insn "*cmp<cmp_op><mode>_ptest"
[(set (reg:CC_NZC CC_REGNUM)
(unspec:CC_NZC
[(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
@@ -1416,13 +2983,7 @@ (define_insn "*cmp<cmp_op><mode>_cc"
(match_operand:SVE_I 3 "aarch64_sve_cmp_<sve_imm_con>_operand" "<sve_imm_con>, w"))]
UNSPEC_MERGE_PTRUE)]
UNSPEC_PTEST_PTRUE))
- (set (match_operand:<VPRED> 0 "register_operand" "=Upa, Upa")
- (unspec:<VPRED>
- [(match_dup 1)
- (SVE_INT_CMP:<VPRED>
- (match_dup 2)
- (match_dup 3))]
- UNSPEC_MERGE_PTRUE))]
+ (clobber (match_scratch:<VPRED> 0 "=Upa, Upa"))]
"TARGET_SVE"
"@
cmp<cmp_op>\t%0.<Vetype>, %1/z, %2.<Vetype>, #%3
@@ -1472,6 +3033,80 @@ (define_insn "*pred_cmp<cmp_op><mode>"
cmp<cmp_op>\t%0.<Vetype>, %1/z, %2.<Vetype>, %3.<Vetype>"
)
+;; -------------------------------------------------------------------------
+;; ---- [INT] While tests
+;; -------------------------------------------------------------------------
+;; Includes:
+;; - WHILELO
+;; -------------------------------------------------------------------------
+
+;; Set element I of the result if operand1 + J < operand2 for all J in [0, I],
+;; with the comparison being unsigned.
+(define_insn "while_ult<GPI:mode><PRED_ALL:mode>"
+ [(set (match_operand:PRED_ALL 0 "register_operand" "=Upa")
+ (unspec:PRED_ALL [(match_operand:GPI 1 "aarch64_reg_or_zero" "rZ")
+ (match_operand:GPI 2 "aarch64_reg_or_zero" "rZ")]
+ UNSPEC_WHILE_LO))
+ (clobber (reg:CC_NZC CC_REGNUM))]
+ "TARGET_SVE"
+ "whilelo\t%0.<PRED_ALL:Vetype>, %<w>1, %<w>2"
+)
+
+;; WHILELO sets the flags in the same way as a PTEST with a PTRUE GP.
+;; Handle the case in which both results are useful. The GP operand
+;; to the PTEST isn't needed, so we allow it to be anything.
+(define_insn_and_rewrite "*while_ult<GPI:mode><PRED_ALL:mode>_cc"
+ [(set (reg:CC_NZC CC_REGNUM)
+ (unspec:CC_NZC
+ [(match_operand:PRED_ALL 1)
+ (unspec:PRED_ALL
+ [(match_operand:GPI 2 "aarch64_reg_or_zero" "rZ")
+ (match_operand:GPI 3 "aarch64_reg_or_zero" "rZ")]
+ UNSPEC_WHILE_LO)]
+ UNSPEC_PTEST_PTRUE))
+ (set (match_operand:PRED_ALL 0 "register_operand" "=Upa")
+ (unspec:PRED_ALL [(match_dup 2)
+ (match_dup 3)]
+ UNSPEC_WHILE_LO))]
+ "TARGET_SVE"
+ "whilelo\t%0.<PRED_ALL:Vetype>, %<w>2, %<w>3"
+ ;; Force the compiler to drop the unused predicate operand, so that we
+ ;; don't have an unnecessary PTRUE.
+ "&& !CONSTANT_P (operands[1])"
+ {
+ operands[1] = CONSTM1_RTX (<PRED_ALL:MODE>mode);
+ }
+)
+
+;; -------------------------------------------------------------------------
+;; ---- [FP] Comparisons
+;; -------------------------------------------------------------------------
+;; Includes:
+;; - FCMEQ
+;; - FCMGE
+;; - FCMGT
+;; - FCMLE
+;; - FCMLT
+;; - FCMNE
+;; - FCMUO
+;; -------------------------------------------------------------------------
+
+;; Floating-point comparisons. All comparisons except FCMUO allow a zero
+;; operand; aarch64_expand_sve_vec_cmp_float handles the case of an FCMUO
+;; with zero.
+(define_expand "vec_cmp<mode><vpred>"
+ [(set (match_operand:<VPRED> 0 "register_operand")
+ (match_operator:<VPRED> 1 "comparison_operator"
+ [(match_operand:SVE_F 2 "register_operand")
+ (match_operand:SVE_F 3 "aarch64_simd_reg_or_zero")]))]
+ "TARGET_SVE"
+ {
+ aarch64_expand_sve_vec_cmp_float (operands[0], GET_CODE (operands[1]),
+ operands[2], operands[3], false);
+ DONE;
+ }
+)
+
;; Floating-point comparisons predicated with a PTRUE.
(define_insn "*fcm<cmp_op><mode>"
[(set (match_operand:<VPRED> 0 "register_operand" "=Upa, Upa")
@@ -1487,6 +3122,7 @@ (define_insn "*fcm<cmp_op><mode>"
fcm<cmp_op>\t%0.<Vetype>, %1/z, %2.<Vetype>, %3.<Vetype>"
)
+;; Same for unordered comparisons.
(define_insn "*fcmuo<mode>"
[(set (match_operand:<VPRED> 0 "register_operand" "=Upa")
(unspec:<VPRED>
@@ -1528,6 +3164,7 @@ (define_insn_and_split "*fcm<cmp_op><mod
(match_dup 4)))]
)
+;; Same for unordered comparisons.
(define_insn_and_split "*fcmuo<mode>_and_combine"
[(set (match_operand:<VPRED> 0 "register_operand" "=Upa")
(and:<VPRED>
@@ -1549,9 +3186,8 @@ (define_insn_and_split "*fcmuo<mode>_and
(match_dup 4)))]
)
-;; Unpredicated floating-point comparisons, with the results ANDed
-;; with another predicate. This is a valid fold for the same reasons
-;; as above.
+;; Unpredicated floating-point comparisons, with the results ANDed with
+;; another predicate. This is a valid fold for the same reasons as above.
(define_insn "*fcm<cmp_op><mode>_and"
[(set (match_operand:<VPRED> 0 "register_operand" "=Upa, Upa")
(and:<VPRED>
@@ -1565,6 +3201,7 @@ (define_insn "*fcm<cmp_op><mode>_and"
fcm<cmp_op>\t%0.<Vetype>, %1/z, %2.<Vetype>, %3.<Vetype>"
)
+;; Same for unordered comparisons.
(define_insn "*fcmuo<mode>_and"
[(set (match_operand:<VPRED> 0 "register_operand" "=Upa")
(and:<VPRED>
@@ -1591,135 +3228,12 @@ (define_insn "*pred_fcm<cmp_op><mode>"
fcm<cmp_op>\t%0.<Vetype>, %1/z, %2.<Vetype>, %3.<Vetype>"
)
-;; vcond_mask operand order: true, false, mask
-;; UNSPEC_SEL operand order: mask, true, false (as for VEC_COND_EXPR)
-;; SEL operand order: mask, true, false
-(define_insn "vcond_mask_<mode><vpred>"
- [(set (match_operand:SVE_ALL 0 "register_operand" "=w")
- (unspec:SVE_ALL
- [(match_operand:<VPRED> 3 "register_operand" "Upa")
- (match_operand:SVE_ALL 1 "register_operand" "w")
- (match_operand:SVE_ALL 2 "register_operand" "w")]
- UNSPEC_SEL))]
- "TARGET_SVE"
- "sel\t%0.<Vetype>, %3, %1.<Vetype>, %2.<Vetype>"
-)
-
-;; Selects between a duplicated immediate and zero.
-(define_insn "aarch64_sve_dup<mode>_const"
- [(set (match_operand:SVE_I 0 "register_operand" "=w")
- (unspec:SVE_I
- [(match_operand:<VPRED> 1 "register_operand" "Upl")
- (match_operand:SVE_I 2 "aarch64_sve_dup_immediate")
- (match_operand:SVE_I 3 "aarch64_simd_imm_zero")]
- UNSPEC_SEL))]
- "TARGET_SVE"
- "mov\t%0.<Vetype>, %1/z, #%2"
-)
-
-;; Integer (signed) vcond. Don't enforce an immediate range here, since it
-;; depends on the comparison; leave it to aarch64_expand_sve_vcond instead.
-(define_expand "vcond<mode><v_int_equiv>"
- [(set (match_operand:SVE_ALL 0 "register_operand")
- (if_then_else:SVE_ALL
- (match_operator 3 "comparison_operator"
- [(match_operand:<V_INT_EQUIV> 4 "register_operand")
- (match_operand:<V_INT_EQUIV> 5 "nonmemory_operand")])
- (match_operand:SVE_ALL 1 "register_operand")
- (match_operand:SVE_ALL 2 "register_operand")))]
- "TARGET_SVE"
- {
- aarch64_expand_sve_vcond (<MODE>mode, <V_INT_EQUIV>mode, operands);
- DONE;
- }
-)
-
-;; Integer vcondu. Don't enforce an immediate range here, since it
-;; depends on the comparison; leave it to aarch64_expand_sve_vcond instead.
-(define_expand "vcondu<mode><v_int_equiv>"
- [(set (match_operand:SVE_ALL 0 "register_operand")
- (if_then_else:SVE_ALL
- (match_operator 3 "comparison_operator"
- [(match_operand:<V_INT_EQUIV> 4 "register_operand")
- (match_operand:<V_INT_EQUIV> 5 "nonmemory_operand")])
- (match_operand:SVE_ALL 1 "register_operand")
- (match_operand:SVE_ALL 2 "register_operand")))]
- "TARGET_SVE"
- {
- aarch64_expand_sve_vcond (<MODE>mode, <V_INT_EQUIV>mode, operands);
- DONE;
- }
-)
-
-;; Floating-point vcond. All comparisons except FCMUO allow a zero
-;; operand; aarch64_expand_sve_vcond handles the case of an FCMUO
-;; with zero.
-(define_expand "vcond<mode><v_fp_equiv>"
- [(set (match_operand:SVE_SD 0 "register_operand")
- (if_then_else:SVE_SD
- (match_operator 3 "comparison_operator"
- [(match_operand:<V_FP_EQUIV> 4 "register_operand")
- (match_operand:<V_FP_EQUIV> 5 "aarch64_simd_reg_or_zero")])
- (match_operand:SVE_SD 1 "register_operand")
- (match_operand:SVE_SD 2 "register_operand")))]
- "TARGET_SVE"
- {
- aarch64_expand_sve_vcond (<MODE>mode, <V_FP_EQUIV>mode, operands);
- DONE;
- }
-)
-
-;; Signed integer comparisons. Don't enforce an immediate range here, since
-;; it depends on the comparison; leave it to aarch64_expand_sve_vec_cmp_int
-;; instead.
-(define_expand "vec_cmp<mode><vpred>"
- [(parallel
- [(set (match_operand:<VPRED> 0 "register_operand")
- (match_operator:<VPRED> 1 "comparison_operator"
- [(match_operand:SVE_I 2 "register_operand")
- (match_operand:SVE_I 3 "nonmemory_operand")]))
- (clobber (reg:CC_NZC CC_REGNUM))])]
- "TARGET_SVE"
- {
- aarch64_expand_sve_vec_cmp_int (operands[0], GET_CODE (operands[1]),
- operands[2], operands[3]);
- DONE;
- }
-)
-
-;; Unsigned integer comparisons. Don't enforce an immediate range here, since
-;; it depends on the comparison; leave it to aarch64_expand_sve_vec_cmp_int
-;; instead.
-(define_expand "vec_cmpu<mode><vpred>"
- [(parallel
- [(set (match_operand:<VPRED> 0 "register_operand")
- (match_operator:<VPRED> 1 "comparison_operator"
- [(match_operand:SVE_I 2 "register_operand")
- (match_operand:SVE_I 3 "nonmemory_operand")]))
- (clobber (reg:CC_NZC CC_REGNUM))])]
- "TARGET_SVE"
- {
- aarch64_expand_sve_vec_cmp_int (operands[0], GET_CODE (operands[1]),
- operands[2], operands[3]);
- DONE;
- }
-)
-
-;; Floating-point comparisons. All comparisons except FCMUO allow a zero
-;; operand; aarch64_expand_sve_vec_cmp_float handles the case of an FCMUO
-;; with zero.
-(define_expand "vec_cmp<mode><vpred>"
- [(set (match_operand:<VPRED> 0 "register_operand")
- (match_operator:<VPRED> 1 "comparison_operator"
- [(match_operand:SVE_F 2 "register_operand")
- (match_operand:SVE_F 3 "aarch64_simd_reg_or_zero")]))]
- "TARGET_SVE"
- {
- aarch64_expand_sve_vec_cmp_float (operands[0], GET_CODE (operands[1]),
- operands[2], operands[3], false);
- DONE;
- }
-)
+;; -------------------------------------------------------------------------
+;; ---- [PRED] Test bits
+;; -------------------------------------------------------------------------
+;; Includes:
+;; - PTEST
+;; -------------------------------------------------------------------------
;; Branch based on predicate equality or inequality.
(define_expand "cbranch<mode>4"
@@ -1748,245 +3262,32 @@ (define_expand "cbranch<mode>4"
}
)
-;; Unpredicated integer MIN/MAX.
-(define_expand "<su><maxmin><mode>3"
- [(set (match_operand:SVE_I 0 "register_operand")
- (unspec:SVE_I
- [(match_dup 3)
- (MAXMIN:SVE_I (match_operand:SVE_I 1 "register_operand")
- (match_operand:SVE_I 2 "register_operand"))]
- UNSPEC_MERGE_PTRUE))]
- "TARGET_SVE"
- {
- operands[3] = aarch64_ptrue_reg (<VPRED>mode);
- }
-)
-
-;; Integer MIN/MAX predicated with a PTRUE.
-(define_insn "*<su><maxmin><mode>3"
- [(set (match_operand:SVE_I 0 "register_operand" "=w, ?&w")
- (unspec:SVE_I
- [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
- (MAXMIN:SVE_I (match_operand:SVE_I 2 "register_operand" "%0, w")
- (match_operand:SVE_I 3 "register_operand" "w, w"))]
- UNSPEC_MERGE_PTRUE))]
- "TARGET_SVE"
- "@
- <su><maxmin>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
- movprfx\t%0, %2\;<su><maxmin>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>"
- [(set_attr "movprfx" "*,yes")]
-)
-
-;; Unpredicated floating-point MIN/MAX.
-(define_expand "<su><maxmin><mode>3"
- [(set (match_operand:SVE_F 0 "register_operand")
- (unspec:SVE_F
- [(match_dup 3)
- (FMAXMIN:SVE_F (match_operand:SVE_F 1 "register_operand")
- (match_operand:SVE_F 2 "register_operand"))]
- UNSPEC_MERGE_PTRUE))]
- "TARGET_SVE"
- {
- operands[3] = aarch64_ptrue_reg (<VPRED>mode);
- }
-)
-
-;; Floating-point MIN/MAX predicated with a PTRUE.
-(define_insn "*<su><maxmin><mode>3"
- [(set (match_operand:SVE_F 0 "register_operand" "=w, ?&w")
- (unspec:SVE_F
- [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
- (FMAXMIN:SVE_F (match_operand:SVE_F 2 "register_operand" "%0, w")
- (match_operand:SVE_F 3 "register_operand" "w, w"))]
- UNSPEC_MERGE_PTRUE))]
- "TARGET_SVE"
- "@
- f<maxmin>nm\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
- movprfx\t%0, %2\;f<maxmin>nm\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>"
- [(set_attr "movprfx" "*,yes")]
-)
-
-;; Unpredicated fmin/fmax.
-(define_expand "<maxmin_uns><mode>3"
- [(set (match_operand:SVE_F 0 "register_operand")
- (unspec:SVE_F
- [(match_dup 3)
- (unspec:SVE_F [(match_operand:SVE_F 1 "register_operand")
- (match_operand:SVE_F 2 "register_operand")]
- FMAXMIN_UNS)]
- UNSPEC_MERGE_PTRUE))]
- "TARGET_SVE"
- {
- operands[3] = aarch64_ptrue_reg (<VPRED>mode);
- }
-)
-
-;; fmin/fmax predicated with a PTRUE.
-(define_insn "*<maxmin_uns><mode>3"
- [(set (match_operand:SVE_F 0 "register_operand" "=w, ?&w")
- (unspec:SVE_F
- [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
- (unspec:SVE_F [(match_operand:SVE_F 2 "register_operand" "%0, w")
- (match_operand:SVE_F 3 "register_operand" "w, w")]
- FMAXMIN_UNS)]
- UNSPEC_MERGE_PTRUE))]
- "TARGET_SVE"
- "@
- <maxmin_uns_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
- movprfx\t%0, %2\;<maxmin_uns_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>"
- [(set_attr "movprfx" "*,yes")]
-)
-
-;; Predicated integer operations with select.
-(define_expand "cond_<optab><mode>"
- [(set (match_operand:SVE_I 0 "register_operand")
- (unspec:SVE_I
- [(match_operand:<VPRED> 1 "register_operand")
- (SVE_INT_BINARY:SVE_I
- (match_operand:SVE_I 2 "register_operand")
- (match_operand:SVE_I 3 "register_operand"))
- (match_operand:SVE_I 4 "aarch64_simd_reg_or_zero")]
- UNSPEC_SEL))]
- "TARGET_SVE"
-)
-
-(define_expand "cond_<optab><mode>"
- [(set (match_operand:SVE_SDI 0 "register_operand")
- (unspec:SVE_SDI
- [(match_operand:<VPRED> 1 "register_operand")
- (SVE_INT_BINARY_SD:SVE_SDI
- (match_operand:SVE_SDI 2 "register_operand")
- (match_operand:SVE_SDI 3 "register_operand"))
- (match_operand:SVE_SDI 4 "aarch64_simd_reg_or_zero")]
- UNSPEC_SEL))]
- "TARGET_SVE"
-)
-
-;; Predicated integer operations with select matching the first operand.
-(define_insn "*cond_<optab><mode>_2"
- [(set (match_operand:SVE_I 0 "register_operand" "=w, ?&w")
- (unspec:SVE_I
- [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
- (SVE_INT_BINARY:SVE_I
- (match_operand:SVE_I 2 "register_operand" "0, w")
- (match_operand:SVE_I 3 "register_operand" "w, w"))
- (match_dup 2)]
- UNSPEC_SEL))]
- "TARGET_SVE"
- "@
- <sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
- movprfx\t%0, %2\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>"
- [(set_attr "movprfx" "*,yes")]
-)
-
-(define_insn "*cond_<optab><mode>_2"
- [(set (match_operand:SVE_SDI 0 "register_operand" "=w, ?&w")
- (unspec:SVE_SDI
- [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
- (SVE_INT_BINARY_SD:SVE_SDI
- (match_operand:SVE_SDI 2 "register_operand" "0, w")
- (match_operand:SVE_SDI 3 "register_operand" "w, w"))
- (match_dup 2)]
- UNSPEC_SEL))]
- "TARGET_SVE"
- "@
- <sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
- movprfx\t%0, %2\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>"
- [(set_attr "movprfx" "*,yes")]
-)
-
-;; Predicated integer operations with select matching the second operand.
-(define_insn "*cond_<optab><mode>_3"
- [(set (match_operand:SVE_I 0 "register_operand" "=w, ?&w")
- (unspec:SVE_I
- [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
- (SVE_INT_BINARY:SVE_I
- (match_operand:SVE_I 2 "register_operand" "w, w")
- (match_operand:SVE_I 3 "register_operand" "0, w"))
- (match_dup 3)]
- UNSPEC_SEL))]
- "TARGET_SVE"
- "@
- <sve_int_op_rev>\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype>
- movprfx\t%0, %3\;<sve_int_op_rev>\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype>"
- [(set_attr "movprfx" "*,yes")]
-)
-
-(define_insn "*cond_<optab><mode>_3"
- [(set (match_operand:SVE_SDI 0 "register_operand" "=w, ?&w")
- (unspec:SVE_SDI
- [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
- (SVE_INT_BINARY_SD:SVE_SDI
- (match_operand:SVE_SDI 2 "register_operand" "w, w")
- (match_operand:SVE_SDI 3 "register_operand" "0, w"))
- (match_dup 3)]
- UNSPEC_SEL))]
+;; Test all bits of operand 1. Operand 0 is a GP that is known to hold PTRUE.
+;;
+;; Using UNSPEC_PTEST_PTRUE allows combine patterns to assume that the GP
+;; is a PTRUE even if the optimizers haven't yet been able to propagate
+;; the constant. We would use a separate unspec code for PTESTs involving
+;; GPs that might not be PTRUEs.
+(define_insn "ptest_ptrue<mode>"
+ [(set (reg:CC_NZC CC_REGNUM)
+ (unspec:CC_NZC
+ [(match_operand:PRED_ALL 0 "register_operand" "Upa")
+ (match_operand:PRED_ALL 1 "register_operand" "Upa")]
+ UNSPEC_PTEST_PTRUE))]
"TARGET_SVE"
- "@
- <sve_int_op_rev>\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype>
- movprfx\t%0, %3\;<sve_int_op_rev>\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype>"
- [(set_attr "movprfx" "*,yes")]
-)
-
-;; Predicated integer binary operations in which the values of inactive
-;; lanes are distinct from the other inputs.
-(define_insn_and_rewrite "*cond_<optab><mode>_any"
- [(set (match_operand:SVE_I 0 "register_operand" "=&w, &w, &w, &w, ?&w")
- (unspec:SVE_I
- [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl, Upl, Upl")
- (SVE_INT_BINARY:SVE_I
- (match_operand:SVE_I 2 "register_operand" "0, w, w, w, w")
- (match_operand:SVE_I 3 "register_operand" "w, 0, w, w, w"))
- (match_operand:SVE_I 4 "aarch64_simd_reg_or_zero" "Dz, Dz, Dz, 0, w")]
- UNSPEC_SEL))]
- "TARGET_SVE
- && !rtx_equal_p (operands[2], operands[4])
- && !rtx_equal_p (operands[3], operands[4])"
- "@
- movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
- movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;<sve_int_op_rev>\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype>
- movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
- movprfx\t%0.<Vetype>, %1/m, %2.<Vetype>\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
- #"
- "&& reload_completed
- && register_operand (operands[4], <MODE>mode)
- && !rtx_equal_p (operands[0], operands[4])"
- {
- emit_insn (gen_vcond_mask_<mode><vpred> (operands[0], operands[2],
- operands[4], operands[1]));
- operands[4] = operands[2] = operands[0];
- }
- [(set_attr "movprfx" "yes")]
+ "ptest\t%0, %1.b"
)
-(define_insn_and_rewrite "*cond_<optab><mode>_any"
- [(set (match_operand:SVE_SDI 0 "register_operand" "=&w, &w, &w, &w, ?&w")
- (unspec:SVE_SDI
- [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl, Upl, Upl")
- (SVE_INT_BINARY_SD:SVE_SDI
- (match_operand:SVE_SDI 2 "register_operand" "0, w, w, w, w")
- (match_operand:SVE_SDI 3 "register_operand" "w, 0, w, w, w"))
- (match_operand:SVE_SDI 4 "aarch64_simd_reg_or_zero" "Dz, Dz, Dz, 0, w")]
- UNSPEC_SEL))]
- "TARGET_SVE
- && !rtx_equal_p (operands[2], operands[4])
- && !rtx_equal_p (operands[3], operands[4])"
- "@
- movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
- movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;<sve_int_op_rev>\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype>
- movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
- movprfx\t%0.<Vetype>, %1/m, %2.<Vetype>\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
- #"
- "&& reload_completed
- && register_operand (operands[4], <MODE>mode)
- && !rtx_equal_p (operands[0], operands[4])"
- {
- emit_insn (gen_vcond_mask_<mode><vpred> (operands[0], operands[2],
- operands[4], operands[1]));
- operands[4] = operands[2] = operands[0];
- }
- [(set_attr "movprfx" "yes")]
-)
+;; =========================================================================
+;; == Reductions
+;; =========================================================================
+
+;; -------------------------------------------------------------------------
+;; ---- [INT,FP] Conditional reductions
+;; -------------------------------------------------------------------------
+;; Includes:
+;; - CLASTB
+;; -------------------------------------------------------------------------
;; Set operand 0 to the last active element in operand 3, or to tied
;; operand 1 if no elements are active.
@@ -2003,6 +3304,20 @@ (define_insn "fold_extract_last_<mode>"
clastb\t%<vw>0, %2, %<vw>0, %3.<Vetype>"
)
+;; -------------------------------------------------------------------------
+;; ---- [INT] Tree reductions
+;; -------------------------------------------------------------------------
+;; Includes:
+;; - ANDV
+;; - EORV
+;; - ORV
+;; - SMAXV
+;; - SMINV
+;; - UADDV
+;; - UMAXV
+;; - UMINV
+;; -------------------------------------------------------------------------
+
;; Unpredicated integer add reduction.
(define_expand "reduc_plus_scal_<mode>"
[(set (match_operand:<VEL> 0 "register_operand")
@@ -2025,92 +3340,110 @@ (define_insn "*reduc_plus_scal_<mode>"
"uaddv\t%d0, %1, %2.<Vetype>"
)
-;; Unpredicated floating-point add reduction.
-(define_expand "reduc_plus_scal_<mode>"
+;; Unpredicated integer MAX/MIN reduction.
+(define_expand "reduc_<maxmin_uns>_scal_<mode>"
[(set (match_operand:<VEL> 0 "register_operand")
(unspec:<VEL> [(match_dup 2)
- (match_operand:SVE_F 1 "register_operand")]
- UNSPEC_FADDV))]
+ (match_operand:SVE_I 1 "register_operand")]
+ MAXMINV))]
"TARGET_SVE"
{
operands[2] = aarch64_ptrue_reg (<VPRED>mode);
}
)
-;; Predicated floating-point add reduction.
-(define_insn "*reduc_plus_scal_<mode>"
+;; Predicated integer MAX/MIN reduction.
+(define_insn "*reduc_<maxmin_uns>_scal_<mode>"
[(set (match_operand:<VEL> 0 "register_operand" "=w")
(unspec:<VEL> [(match_operand:<VPRED> 1 "register_operand" "Upl")
- (match_operand:SVE_F 2 "register_operand" "w")]
- UNSPEC_FADDV))]
+ (match_operand:SVE_I 2 "register_operand" "w")]
+ MAXMINV))]
"TARGET_SVE"
- "faddv\t%<Vetype>0, %1, %2.<Vetype>"
+ "<maxmin_uns_op>v\t%<Vetype>0, %1, %2.<Vetype>"
)
-;; Unpredicated integer MIN/MAX reduction.
-(define_expand "reduc_<maxmin_uns>_scal_<mode>"
+(define_expand "reduc_<optab>_scal_<mode>"
[(set (match_operand:<VEL> 0 "register_operand")
(unspec:<VEL> [(match_dup 2)
(match_operand:SVE_I 1 "register_operand")]
- MAXMINV))]
+ BITWISEV))]
"TARGET_SVE"
{
operands[2] = aarch64_ptrue_reg (<VPRED>mode);
}
)
-;; Predicated integer MIN/MAX reduction.
-(define_insn "*reduc_<maxmin_uns>_scal_<mode>"
+(define_insn "*reduc_<optab>_scal_<mode>"
[(set (match_operand:<VEL> 0 "register_operand" "=w")
(unspec:<VEL> [(match_operand:<VPRED> 1 "register_operand" "Upl")
(match_operand:SVE_I 2 "register_operand" "w")]
- MAXMINV))]
+ BITWISEV))]
"TARGET_SVE"
- "<maxmin_uns_op>v\t%<Vetype>0, %1, %2.<Vetype>"
+ "<bit_reduc_op>\t%<Vetype>0, %1, %2.<Vetype>"
)
-;; Unpredicated floating-point MIN/MAX reduction.
-(define_expand "reduc_<maxmin_uns>_scal_<mode>"
+;; -------------------------------------------------------------------------
+;; ---- [FP] Tree reductions
+;; -------------------------------------------------------------------------
+;; Includes:
+;; - FADDV
+;; - FMAXNMV
+;; - FMAXV
+;; - FMINNMV
+;; - FMINV
+;; -------------------------------------------------------------------------
+
+;; Unpredicated floating-point add reduction.
+(define_expand "reduc_plus_scal_<mode>"
[(set (match_operand:<VEL> 0 "register_operand")
(unspec:<VEL> [(match_dup 2)
(match_operand:SVE_F 1 "register_operand")]
- FMAXMINV))]
+ UNSPEC_FADDV))]
"TARGET_SVE"
{
operands[2] = aarch64_ptrue_reg (<VPRED>mode);
}
)
-;; Predicated floating-point MIN/MAX reduction.
-(define_insn "*reduc_<maxmin_uns>_scal_<mode>"
+;; Predicated floating-point add reduction.
+(define_insn "*reduc_plus_scal_<mode>"
[(set (match_operand:<VEL> 0 "register_operand" "=w")
(unspec:<VEL> [(match_operand:<VPRED> 1 "register_operand" "Upl")
(match_operand:SVE_F 2 "register_operand" "w")]
- FMAXMINV))]
+ UNSPEC_FADDV))]
"TARGET_SVE"
- "<maxmin_uns_op>v\t%<Vetype>0, %1, %2.<Vetype>"
+ "faddv\t%<Vetype>0, %1, %2.<Vetype>"
)
-(define_expand "reduc_<optab>_scal_<mode>"
+;; Unpredicated floating-point MAX/MIN reduction.
+(define_expand "reduc_<maxmin_uns>_scal_<mode>"
[(set (match_operand:<VEL> 0 "register_operand")
(unspec:<VEL> [(match_dup 2)
- (match_operand:SVE_I 1 "register_operand")]
- BITWISEV))]
+ (match_operand:SVE_F 1 "register_operand")]
+ FMAXMINV))]
"TARGET_SVE"
{
operands[2] = aarch64_ptrue_reg (<VPRED>mode);
}
)
-(define_insn "*reduc_<optab>_scal_<mode>"
+;; Predicated floating-point MAX/MIN reduction.
+(define_insn "*reduc_<maxmin_uns>_scal_<mode>"
[(set (match_operand:<VEL> 0 "register_operand" "=w")
(unspec:<VEL> [(match_operand:<VPRED> 1 "register_operand" "Upl")
- (match_operand:SVE_I 2 "register_operand" "w")]
- BITWISEV))]
+ (match_operand:SVE_F 2 "register_operand" "w")]
+ FMAXMINV))]
"TARGET_SVE"
- "<bit_reduc_op>\t%<Vetype>0, %1, %2.<Vetype>"
+ "<maxmin_uns_op>v\t%<Vetype>0, %1, %2.<Vetype>"
)
+;; -------------------------------------------------------------------------
+;; ---- [FP] Left-to-right reductions
+;; -------------------------------------------------------------------------
+;; Includes:
+;; - FADDA
+;; -------------------------------------------------------------------------
+
;; Unpredicated in-order FP reductions.
(define_expand "fold_left_plus_<mode>"
[(set (match_operand:<VEL> 0 "register_operand")
@@ -2124,7 +3457,7 @@ (define_expand "fold_left_plus_<mode>"
}
)
-;; In-order FP reductions predicated with PTRUE.
+;; Predicated in-order FP reductions.
(define_insn "mask_fold_left_plus_<mode>"
[(set (match_operand:<VEL> 0 "register_operand" "=w")
(unspec:<VEL> [(match_operand:<VPRED> 3 "register_operand" "Upl")
@@ -2150,356 +3483,233 @@ (define_insn "*pred_fold_left_plus_<mode
"fadda\t%<Vetype>0, %2, %<Vetype>0, %3.<Vetype>"
)
-;; Unpredicated floating-point addition.
-(define_expand "add<mode>3"
- [(set (match_operand:SVE_F 0 "register_operand")
- (unspec:SVE_F
- [(match_dup 3)
- (plus:SVE_F
- (match_operand:SVE_F 1 "register_operand")
- (match_operand:SVE_F 2 "aarch64_sve_float_arith_with_sub_operand"))]
- UNSPEC_MERGE_PTRUE))]
- "TARGET_SVE"
+;; =========================================================================
+;; == Permutes
+;; =========================================================================
+
+;; -------------------------------------------------------------------------
+;; ---- [INT,FP] General permutes
+;; -------------------------------------------------------------------------
+;; Includes:
+;; - TBL
+;; -------------------------------------------------------------------------
+
+(define_expand "vec_perm<mode>"
+ [(match_operand:SVE_ALL 0 "register_operand")
+ (match_operand:SVE_ALL 1 "register_operand")
+ (match_operand:SVE_ALL 2 "register_operand")
+ (match_operand:<V_INT_EQUIV> 3 "aarch64_sve_vec_perm_operand")]
+ "TARGET_SVE && GET_MODE_NUNITS (<MODE>mode).is_constant ()"
{
- operands[3] = aarch64_ptrue_reg (<VPRED>mode);
+ aarch64_expand_sve_vec_perm (operands[0], operands[1],
+ operands[2], operands[3]);
+ DONE;
}
)
-;; Floating-point addition predicated with a PTRUE.
-(define_insn_and_split "*add<mode>3"
- [(set (match_operand:SVE_F 0 "register_operand" "=w, w, w")
- (unspec:SVE_F
- [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl")
- (plus:SVE_F
- (match_operand:SVE_F 2 "register_operand" "%0, 0, w")
- (match_operand:SVE_F 3 "aarch64_sve_float_arith_with_sub_operand" "vsA, vsN, w"))]
- UNSPEC_MERGE_PTRUE))]
+(define_insn "*aarch64_sve_tbl<mode>"
+ [(set (match_operand:SVE_ALL 0 "register_operand" "=w")
+ (unspec:SVE_ALL
+ [(match_operand:SVE_ALL 1 "register_operand" "w")
+ (match_operand:<V_INT_EQUIV> 2 "register_operand" "w")]
+ UNSPEC_TBL))]
"TARGET_SVE"
- "@
- fadd\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3
- fsub\t%0.<Vetype>, %1/m, %0.<Vetype>, #%N3
- #"
- ; Split the unpredicated form after reload, so that we don't have
- ; the unnecessary PTRUE.
- "&& reload_completed
- && register_operand (operands[3], <MODE>mode)"
- [(set (match_dup 0) (plus:SVE_F (match_dup 2) (match_dup 3)))]
+ "tbl\t%0.<Vetype>, %1.<Vetype>, %2.<Vetype>"
)
-;; Unpredicated floating-point subtraction.
-(define_expand "sub<mode>3"
- [(set (match_operand:SVE_F 0 "register_operand")
- (unspec:SVE_F
- [(match_dup 3)
- (minus:SVE_F
- (match_operand:SVE_F 1 "aarch64_sve_float_arith_operand")
- (match_operand:SVE_F 2 "register_operand"))]
- UNSPEC_MERGE_PTRUE))]
- "TARGET_SVE"
- {
- operands[3] = aarch64_ptrue_reg (<VPRED>mode);
- }
-)
+;; -------------------------------------------------------------------------
+;; ---- [INT,FP] Special-purpose unary permutes
+;; -------------------------------------------------------------------------
+;; Includes:
+;; - DUP
+;; - REV
+;; - REVB
+;; - REVH
+;; - REVW
+;; -------------------------------------------------------------------------
-;; Floating-point subtraction predicated with a PTRUE.
-(define_insn_and_split "*sub<mode>3"
- [(set (match_operand:SVE_F 0 "register_operand" "=w, w, w, w")
- (unspec:SVE_F
- [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl, Upl")
- (minus:SVE_F
- (match_operand:SVE_F 2 "aarch64_sve_float_arith_operand" "0, 0, vsA, w")
- (match_operand:SVE_F 3 "aarch64_sve_float_arith_with_sub_operand" "vsA, vsN, 0, w"))]
- UNSPEC_MERGE_PTRUE))]
+;; Duplicate one element of a vector.
+(define_insn "*aarch64_sve_dup_lane<mode>"
+ [(set (match_operand:SVE_ALL 0 "register_operand" "=w")
+ (vec_duplicate:SVE_ALL
+ (vec_select:<VEL>
+ (match_operand:SVE_ALL 1 "register_operand" "w")
+ (parallel [(match_operand:SI 2 "const_int_operand")]))))]
"TARGET_SVE
- && (register_operand (operands[2], <MODE>mode)
- || register_operand (operands[3], <MODE>mode))"
- "@
- fsub\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3
- fadd\t%0.<Vetype>, %1/m, %0.<Vetype>, #%N3
- fsubr\t%0.<Vetype>, %1/m, %0.<Vetype>, #%2
- #"
- ; Split the unpredicated form after reload, so that we don't have
- ; the unnecessary PTRUE.
- "&& reload_completed
- && register_operand (operands[2], <MODE>mode)
- && register_operand (operands[3], <MODE>mode)"
- [(set (match_dup 0) (minus:SVE_F (match_dup 2) (match_dup 3)))]
+ && IN_RANGE (INTVAL (operands[2]) * GET_MODE_SIZE (<VEL>mode), 0, 63)"
+ "dup\t%0.<Vetype>, %1.<Vetype>[%2]"
)
-;; Unpredicated floating-point multiplication.
-(define_expand "mul<mode>3"
- [(set (match_operand:SVE_F 0 "register_operand")
- (unspec:SVE_F
- [(match_dup 3)
- (mult:SVE_F
- (match_operand:SVE_F 1 "register_operand")
- (match_operand:SVE_F 2 "aarch64_sve_float_mul_operand"))]
- UNSPEC_MERGE_PTRUE))]
+;; Reverse the order of elements within a full vector.
+(define_insn "@aarch64_sve_rev<mode>"
+ [(set (match_operand:SVE_ALL 0 "register_operand" "=w")
+ (unspec:SVE_ALL [(match_operand:SVE_ALL 1 "register_operand" "w")]
+ UNSPEC_REV))]
"TARGET_SVE"
- {
- operands[3] = aarch64_ptrue_reg (<VPRED>mode);
- }
-)
+ "rev\t%0.<Vetype>, %1.<Vetype>")
-;; Floating-point multiplication predicated with a PTRUE.
-(define_insn_and_split "*mul<mode>3"
- [(set (match_operand:SVE_F 0 "register_operand" "=w, w")
- (unspec:SVE_F
- [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
- (mult:SVE_F
- (match_operand:SVE_F 2 "register_operand" "%0, w")
- (match_operand:SVE_F 3 "aarch64_sve_float_mul_operand" "vsM, w"))]
+;; Reverse the order elements within a 64-bit container.
+(define_insn "*aarch64_sve_rev64<mode>"
+ [(set (match_operand:SVE_BHS 0 "register_operand" "=w")
+ (unspec:SVE_BHS
+ [(match_operand:VNx2BI 1 "register_operand" "Upl")
+ (unspec:SVE_BHS [(match_operand:SVE_BHS 2 "register_operand" "w")]
+ UNSPEC_REV64)]
UNSPEC_MERGE_PTRUE))]
"TARGET_SVE"
- "@
- fmul\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3
- #"
- ; Split the unpredicated form after reload, so that we don't have
- ; the unnecessary PTRUE.
- "&& reload_completed
- && register_operand (operands[3], <MODE>mode)"
- [(set (match_dup 0) (mult:SVE_F (match_dup 2) (match_dup 3)))]
+ "rev<Vesize>\t%0.d, %1/m, %2.d"
)
-;; Unpredicated floating-point binary operations (post-RA only).
-;; These are generated by splitting a predicated instruction whose
-;; predicate is unused.
-(define_insn "*post_ra_<sve_fp_op><mode>3"
- [(set (match_operand:SVE_F 0 "register_operand" "=w")
- (SVE_UNPRED_FP_BINARY:SVE_F
- (match_operand:SVE_F 1 "register_operand" "w")
- (match_operand:SVE_F 2 "register_operand" "w")))]
- "TARGET_SVE && reload_completed"
- "<sve_fp_op>\t%0.<Vetype>, %1.<Vetype>, %2.<Vetype>")
-
-;; Unpredicated fma (%0 = (%1 * %2) + %3).
-(define_expand "fma<mode>4"
- [(set (match_operand:SVE_F 0 "register_operand")
- (unspec:SVE_F
- [(match_dup 4)
- (fma:SVE_F (match_operand:SVE_F 1 "register_operand")
- (match_operand:SVE_F 2 "register_operand")
- (match_operand:SVE_F 3 "register_operand"))]
+;; Reverse the order elements within a 32-bit container.
+(define_insn "*aarch64_sve_rev32<mode>"
+ [(set (match_operand:SVE_BH 0 "register_operand" "=w")
+ (unspec:SVE_BH
+ [(match_operand:VNx4BI 1 "register_operand" "Upl")
+ (unspec:SVE_BH [(match_operand:SVE_BH 2 "register_operand" "w")]
+ UNSPEC_REV32)]
UNSPEC_MERGE_PTRUE))]
"TARGET_SVE"
- {
- operands[4] = aarch64_ptrue_reg (<VPRED>mode);
- }
+ "rev<Vesize>\t%0.s, %1/m, %2.s"
)
-;; fma predicated with a PTRUE.
-(define_insn "*fma<mode>4"
- [(set (match_operand:SVE_F 0 "register_operand" "=w, w, ?&w")
- (unspec:SVE_F
- [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl")
- (fma:SVE_F (match_operand:SVE_F 3 "register_operand" "%0, w, w")
- (match_operand:SVE_F 4 "register_operand" "w, w, w")
- (match_operand:SVE_F 2 "register_operand" "w, 0, w"))]
+;; Reverse the order elements within a 16-bit container.
+(define_insn "*aarch64_sve_rev16vnx16qi"
+ [(set (match_operand:VNx16QI 0 "register_operand" "=w")
+ (unspec:VNx16QI
+ [(match_operand:VNx8BI 1 "register_operand" "Upl")
+ (unspec:VNx16QI [(match_operand:VNx16QI 2 "register_operand" "w")]
+ UNSPEC_REV16)]
UNSPEC_MERGE_PTRUE))]
"TARGET_SVE"
- "@
- fmad\t%0.<Vetype>, %1/m, %4.<Vetype>, %2.<Vetype>
- fmla\t%0.<Vetype>, %1/m, %3.<Vetype>, %4.<Vetype>
- movprfx\t%0, %2\;fmla\t%0.<Vetype>, %1/m, %3.<Vetype>, %4.<Vetype>"
- [(set_attr "movprfx" "*,*,yes")]
+ "revb\t%0.h, %1/m, %2.h"
)
-;; Unpredicated fnma (%0 = (-%1 * %2) + %3).
-(define_expand "fnma<mode>4"
- [(set (match_operand:SVE_F 0 "register_operand")
- (unspec:SVE_F
- [(match_dup 4)
- (fma:SVE_F (neg:SVE_F
- (match_operand:SVE_F 1 "register_operand"))
- (match_operand:SVE_F 2 "register_operand")
- (match_operand:SVE_F 3 "register_operand"))]
- UNSPEC_MERGE_PTRUE))]
- "TARGET_SVE"
- {
- operands[4] = aarch64_ptrue_reg (<VPRED>mode);
- }
-)
+;; -------------------------------------------------------------------------
+;; ---- [INT,FP] Special-purpose binary permutes
+;; -------------------------------------------------------------------------
+;; Includes:
+;; - TRN1
+;; - TRN2
+;; - UZP1
+;; - UZP2
+;; - ZIP1
+;; - ZIP2
+;; -------------------------------------------------------------------------
-;; fnma predicated with a PTRUE.
-(define_insn "*fnma<mode>4"
- [(set (match_operand:SVE_F 0 "register_operand" "=w, w, ?&w")
- (unspec:SVE_F
- [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl")
- (fma:SVE_F (neg:SVE_F
- (match_operand:SVE_F 3 "register_operand" "%0, w, w"))
- (match_operand:SVE_F 4 "register_operand" "w, w, w")
- (match_operand:SVE_F 2 "register_operand" "w, 0, w"))]
- UNSPEC_MERGE_PTRUE))]
+;; Permutes that take half the elements from one vector and half the
+;; elements from the other.
+(define_insn "aarch64_sve_<perm_insn><perm_hilo><mode>"
+ [(set (match_operand:SVE_ALL 0 "register_operand" "=w")
+ (unspec:SVE_ALL [(match_operand:SVE_ALL 1 "register_operand" "w")
+ (match_operand:SVE_ALL 2 "register_operand" "w")]
+ PERMUTE))]
"TARGET_SVE"
- "@
- fmsb\t%0.<Vetype>, %1/m, %4.<Vetype>, %2.<Vetype>
- fmls\t%0.<Vetype>, %1/m, %3.<Vetype>, %4.<Vetype>
- movprfx\t%0, %2\;fmls\t%0.<Vetype>, %1/m, %3.<Vetype>, %4.<Vetype>"
- [(set_attr "movprfx" "*,*,yes")]
+ "<perm_insn><perm_hilo>\t%0.<Vetype>, %1.<Vetype>, %2.<Vetype>"
)
-;; Unpredicated fms (%0 = (%1 * %2) - %3).
-(define_expand "fms<mode>4"
- [(set (match_operand:SVE_F 0 "register_operand")
- (unspec:SVE_F
- [(match_dup 4)
- (fma:SVE_F (match_operand:SVE_F 1 "register_operand")
- (match_operand:SVE_F 2 "register_operand")
- (neg:SVE_F
- (match_operand:SVE_F 3 "register_operand")))]
- UNSPEC_MERGE_PTRUE))]
- "TARGET_SVE"
+;; Concatenate two vectors and extract a subvector. Note that the
+;; immediate (third) operand is the lane index not the byte index.
+(define_insn "*aarch64_sve_ext<mode>"
+ [(set (match_operand:SVE_ALL 0 "register_operand" "=w")
+ (unspec:SVE_ALL [(match_operand:SVE_ALL 1 "register_operand" "0")
+ (match_operand:SVE_ALL 2 "register_operand" "w")
+ (match_operand:SI 3 "const_int_operand")]
+ UNSPEC_EXT))]
+ "TARGET_SVE
+ && IN_RANGE (INTVAL (operands[3]) * GET_MODE_SIZE (<VEL>mode), 0, 255)"
{
- operands[4] = aarch64_ptrue_reg (<VPRED>mode);
+ operands[3] = GEN_INT (INTVAL (operands[3]) * GET_MODE_SIZE (<VEL>mode));
+ return "ext\\t%0.b, %0.b, %2.b, #%3";
}
)
-;; fms predicated with a PTRUE.
-(define_insn "*fms<mode>4"
- [(set (match_operand:SVE_F 0 "register_operand" "=w, w, ?&w")
- (unspec:SVE_F
- [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl")
- (fma:SVE_F (match_operand:SVE_F 3 "register_operand" "%0, w, w")
- (match_operand:SVE_F 4 "register_operand" "w, w, w")
- (neg:SVE_F
- (match_operand:SVE_F 2 "register_operand" "w, 0, w")))]
- UNSPEC_MERGE_PTRUE))]
- "TARGET_SVE"
- "@
- fnmsb\t%0.<Vetype>, %1/m, %4.<Vetype>, %2.<Vetype>
- fnmls\t%0.<Vetype>, %1/m, %3.<Vetype>, %4.<Vetype>
- movprfx\t%0, %2\;fnmls\t%0.<Vetype>, %1/m, %3.<Vetype>, %4.<Vetype>"
- [(set_attr "movprfx" "*,*,yes")]
-)
+;; -------------------------------------------------------------------------
+;; ---- [PRED] Special-purpose binary permutes
+;; -------------------------------------------------------------------------
+;; Includes:
+;; - TRN1
+;; - TRN2
+;; - UZP1
+;; - UZP2
+;; - ZIP1
+;; - ZIP2
+;; -------------------------------------------------------------------------
-;; Unpredicated fnms (%0 = (-%1 * %2) - %3).
-(define_expand "fnms<mode>4"
- [(set (match_operand:SVE_F 0 "register_operand")
- (unspec:SVE_F
- [(match_dup 4)
- (fma:SVE_F (neg:SVE_F
- (match_operand:SVE_F 1 "register_operand"))
- (match_operand:SVE_F 2 "register_operand")
- (neg:SVE_F
- (match_operand:SVE_F 3 "register_operand")))]
- UNSPEC_MERGE_PTRUE))]
+;; Permutes that take half the elements from one vector and half the
+;; elements from the other.
+(define_insn "*aarch64_sve_<perm_insn><perm_hilo><mode>"
+ [(set (match_operand:PRED_ALL 0 "register_operand" "=Upa")
+ (unspec:PRED_ALL [(match_operand:PRED_ALL 1 "register_operand" "Upa")
+ (match_operand:PRED_ALL 2 "register_operand" "Upa")]
+ PERMUTE))]
"TARGET_SVE"
- {
- operands[4] = aarch64_ptrue_reg (<VPRED>mode);
- }
+ "<perm_insn><perm_hilo>\t%0.<Vetype>, %1.<Vetype>, %2.<Vetype>"
)
-;; fnms predicated with a PTRUE.
-(define_insn "*fnms<mode>4"
- [(set (match_operand:SVE_F 0 "register_operand" "=w, w, ?&w")
- (unspec:SVE_F
- [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl")
- (fma:SVE_F (neg:SVE_F
- (match_operand:SVE_F 3 "register_operand" "%0, w, w"))
- (match_operand:SVE_F 4 "register_operand" "w, w, w")
- (neg:SVE_F
- (match_operand:SVE_F 2 "register_operand" "w, 0, w")))]
- UNSPEC_MERGE_PTRUE))]
- "TARGET_SVE"
- "@
- fnmad\t%0.<Vetype>, %1/m, %4.<Vetype>, %2.<Vetype>
- fnmla\t%0.<Vetype>, %1/m, %3.<Vetype>, %4.<Vetype>
- movprfx\t%0, %2\;fnmla\t%0.<Vetype>, %1/m, %3.<Vetype>, %4.<Vetype>"
- [(set_attr "movprfx" "*,*,yes")]
-)
+;; =========================================================================
+;; == Conversions
+;; =========================================================================
+
+;; -------------------------------------------------------------------------
+;; ---- [INT<-INT] Packs
+;; -------------------------------------------------------------------------
+;; Includes:
+;; - UZP1
+;; -------------------------------------------------------------------------
-;; Unpredicated floating-point division.
-(define_expand "div<mode>3"
- [(set (match_operand:SVE_F 0 "register_operand")
- (unspec:SVE_F
- [(match_dup 3)
- (div:SVE_F (match_operand:SVE_F 1 "register_operand")
- (match_operand:SVE_F 2 "register_operand"))]
- UNSPEC_MERGE_PTRUE))]
+;; Integer pack. Use UZP1 on the narrower type, which discards
+;; the high part of each wide element.
+(define_insn "vec_pack_trunc_<Vwide>"
+ [(set (match_operand:SVE_BHSI 0 "register_operand" "=w")
+ (unspec:SVE_BHSI
+ [(match_operand:<VWIDE> 1 "register_operand" "w")
+ (match_operand:<VWIDE> 2 "register_operand" "w")]
+ UNSPEC_PACK))]
"TARGET_SVE"
- {
- operands[3] = aarch64_ptrue_reg (<VPRED>mode);
- }
+ "uzp1\t%0.<Vetype>, %1.<Vetype>, %2.<Vetype>"
)
-;; Floating-point division predicated with a PTRUE.
-(define_insn "*div<mode>3"
- [(set (match_operand:SVE_F 0 "register_operand" "=w, w, ?&w")
- (unspec:SVE_F
- [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl")
- (div:SVE_F (match_operand:SVE_F 2 "register_operand" "0, w, w")
- (match_operand:SVE_F 3 "register_operand" "w, 0, w"))]
- UNSPEC_MERGE_PTRUE))]
- "TARGET_SVE"
- "@
- fdiv\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
- fdivr\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype>
- movprfx\t%0, %2\;fdiv\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>"
- [(set_attr "movprfx" "*,*,yes")]
-)
+;; -------------------------------------------------------------------------
+;; ---- [INT<-INT] Unpacks
+;; -------------------------------------------------------------------------
+;; Includes:
+;; - SUNPKHI
+;; - SUNPKLO
+;; - UUNPKHI
+;; - UUNPKLO
+;; -------------------------------------------------------------------------
-;; Unpredicated FNEG, FABS and FSQRT.
-(define_expand "<optab><mode>2"
- [(set (match_operand:SVE_F 0 "register_operand")
- (unspec:SVE_F
- [(match_dup 2)
- (SVE_FP_UNARY:SVE_F (match_operand:SVE_F 1 "register_operand"))]
- UNSPEC_MERGE_PTRUE))]
+;; Unpack the low or high half of a vector, where "high" refers to
+;; the low-numbered lanes for big-endian and the high-numbered lanes
+;; for little-endian.
+(define_expand "vec_unpack<su>_<perm_hilo>_<SVE_BHSI:mode>"
+ [(match_operand:<VWIDE> 0 "register_operand")
+ (unspec:<VWIDE> [(match_operand:SVE_BHSI 1 "register_operand")] UNPACK)]
"TARGET_SVE"
{
- operands[2] = aarch64_ptrue_reg (<VPRED>mode);
+ emit_insn ((<hi_lanes_optab>
+ ? gen_aarch64_sve_<su>unpkhi_<SVE_BHSI:mode>
+ : gen_aarch64_sve_<su>unpklo_<SVE_BHSI:mode>)
+ (operands[0], operands[1]));
+ DONE;
}
)
-;; FNEG, FABS and FSQRT predicated with a PTRUE.
-(define_insn "*<optab><mode>2"
- [(set (match_operand:SVE_F 0 "register_operand" "=w")
- (unspec:SVE_F
- [(match_operand:<VPRED> 1 "register_operand" "Upl")
- (SVE_FP_UNARY:SVE_F (match_operand:SVE_F 2 "register_operand" "w"))]
- UNSPEC_MERGE_PTRUE))]
- "TARGET_SVE"
- "<sve_fp_op>\t%0.<Vetype>, %1/m, %2.<Vetype>"
-)
-
-(define_insn "*fabd<mode>3"
- [(set (match_operand:SVE_F 0 "register_operand" "=w")
- (unspec:SVE_F
- [(match_operand:<VPRED> 1 "register_operand" "Upl")
- (abs:SVE_F
- (minus:SVE_F
- (match_operand:SVE_F 2 "register_operand" "0")
- (match_operand:SVE_F 3 "register_operand" "w")))]
- UNSPEC_MERGE_PTRUE))]
- "TARGET_SVE"
- "fabd\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>"
-)
-
-;; Unpredicated FRINTy.
-(define_expand "<frint_pattern><mode>2"
- [(set (match_operand:SVE_F 0 "register_operand")
- (unspec:SVE_F
- [(match_dup 2)
- (unspec:SVE_F [(match_operand:SVE_F 1 "register_operand")]
- FRINT)]
- UNSPEC_MERGE_PTRUE))]
+(define_insn "aarch64_sve_<su>unpk<perm_hilo>_<SVE_BHSI:mode>"
+ [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
+ (unspec:<VWIDE> [(match_operand:SVE_BHSI 1 "register_operand" "w")]
+ UNPACK))]
"TARGET_SVE"
- {
- operands[2] = aarch64_ptrue_reg (<VPRED>mode);
- }
+ "<su>unpk<perm_hilo>\t%0.<Vewtype>, %1.<Vetype>"
)
-;; FRINTy predicated with a PTRUE.
-(define_insn "*<frint_pattern><mode>2"
- [(set (match_operand:SVE_F 0 "register_operand" "=w")
- (unspec:SVE_F
- [(match_operand:<VPRED> 1 "register_operand" "Upl")
- (unspec:SVE_F [(match_operand:SVE_F 2 "register_operand" "w")]
- FRINT)]
- UNSPEC_MERGE_PTRUE))]
- "TARGET_SVE"
- "frint<frint_suffix>\t%0.<Vetype>, %1/m, %2.<Vetype>"
-)
+;; -------------------------------------------------------------------------
+;; ---- [INT<-FP] Conversions
+;; -------------------------------------------------------------------------
+;; Includes:
+;; - FCVTZS
+;; - FCVTZU
+;; -------------------------------------------------------------------------
;; Unpredicated conversion of floats to integers of the same size (HF to HI,
;; SF to SI or DF to DI).
@@ -2552,6 +3762,48 @@ (define_insn "*<fix_trunc_optab>vnx2df<m
"fcvtz<su>\t%0.<Vetype>, %1/m, %2.d"
)
+;; -------------------------------------------------------------------------
+;; ---- [INT<-FP] Packs
+;; -------------------------------------------------------------------------
+;; The patterns in this section are synthetic.
+;; -------------------------------------------------------------------------
+
+;; Convert two vectors of DF to SI and pack the results into a single vector.
+(define_expand "vec_pack_<su>fix_trunc_vnx2df"
+ [(set (match_dup 4)
+ (unspec:VNx4SI
+ [(match_dup 3)
+ (FIXUORS:VNx4SI (match_operand:VNx2DF 1 "register_operand"))]
+ UNSPEC_MERGE_PTRUE))
+ (set (match_dup 5)
+ (unspec:VNx4SI
+ [(match_dup 3)
+ (FIXUORS:VNx4SI (match_operand:VNx2DF 2 "register_operand"))]
+ UNSPEC_MERGE_PTRUE))
+ (set (match_operand:VNx4SI 0 "register_operand")
+ (unspec:VNx4SI [(match_dup 4) (match_dup 5)] UNSPEC_UZP1))]
+ "TARGET_SVE"
+ {
+ operands[3] = aarch64_ptrue_reg (VNx2BImode);
+ operands[4] = gen_reg_rtx (VNx4SImode);
+ operands[5] = gen_reg_rtx (VNx4SImode);
+ }
+)
+
+;; -------------------------------------------------------------------------
+;; ---- [INT<-FP] Unpacks
+;; -------------------------------------------------------------------------
+;; No patterns here yet!
+;; -------------------------------------------------------------------------
+
+;; -------------------------------------------------------------------------
+;; ---- [FP<-INT] Conversions
+;; -------------------------------------------------------------------------
+;; Includes:
+;; - SCVTF
+;; - UCVTF
+;; -------------------------------------------------------------------------
+
;; Unpredicated conversion of integers to floats of the same size
;; (HI to HF, SI to SF or DI to DF).
(define_expand "<optab><v_int_equiv><mode>2"
@@ -2604,109 +3856,17 @@ (define_insn "aarch64_sve_<optab><mode>v
"<su_optab>cvtf\t%0.d, %1/m, %2.<Vetype>"
)
-;; Conversion of DFs to the same number of SFs, or SFs to the same number
-;; of HFs.
-(define_insn "*trunc<Vwide><mode>2"
- [(set (match_operand:SVE_HSF 0 "register_operand" "=w")
- (unspec:SVE_HSF
- [(match_operand:<VWIDE_PRED> 1 "register_operand" "Upl")
- (unspec:SVE_HSF
- [(match_operand:<VWIDE> 2 "register_operand" "w")]
- UNSPEC_FLOAT_CONVERT)]
- UNSPEC_MERGE_PTRUE))]
- "TARGET_SVE"
- "fcvt\t%0.<Vetype>, %1/m, %2.<Vewtype>"
-)
-
-;; Conversion of SFs to the same number of DFs, or HFs to the same number
-;; of SFs.
-(define_insn "aarch64_sve_extend<mode><Vwide>2"
- [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
- (unspec:<VWIDE>
- [(match_operand:<VWIDE_PRED> 1 "register_operand" "Upl")
- (unspec:<VWIDE>
- [(match_operand:SVE_HSF 2 "register_operand" "w")]
- UNSPEC_FLOAT_CONVERT)]
- UNSPEC_MERGE_PTRUE))]
- "TARGET_SVE"
- "fcvt\t%0.<Vewtype>, %1/m, %2.<Vetype>"
-)
-
-;; Unpack the low or high half of a predicate, where "high" refers to
-;; the low-numbered lanes for big-endian and the high-numbered lanes
-;; for little-endian.
-(define_expand "vec_unpack<su>_<perm_hilo>_<mode>"
- [(match_operand:<VWIDE> 0 "register_operand")
- (unspec:<VWIDE> [(match_operand:PRED_BHS 1 "register_operand")]
- UNPACK)]
- "TARGET_SVE"
- {
- emit_insn ((<hi_lanes_optab>
- ? gen_aarch64_sve_punpkhi_<PRED_BHS:mode>
- : gen_aarch64_sve_punpklo_<PRED_BHS:mode>)
- (operands[0], operands[1]));
- DONE;
- }
-)
-
-;; PUNPKHI and PUNPKLO.
-(define_insn "aarch64_sve_punpk<perm_hilo>_<mode>"
- [(set (match_operand:<VWIDE> 0 "register_operand" "=Upa")
- (unspec:<VWIDE> [(match_operand:PRED_BHS 1 "register_operand" "Upa")]
- UNPACK_UNSIGNED))]
- "TARGET_SVE"
- "punpk<perm_hilo>\t%0.h, %1.b"
-)
-
-;; Unpack the low or high half of a vector, where "high" refers to
-;; the low-numbered lanes for big-endian and the high-numbered lanes
-;; for little-endian.
-(define_expand "vec_unpack<su>_<perm_hilo>_<SVE_BHSI:mode>"
- [(match_operand:<VWIDE> 0 "register_operand")
- (unspec:<VWIDE> [(match_operand:SVE_BHSI 1 "register_operand")] UNPACK)]
- "TARGET_SVE"
- {
- emit_insn ((<hi_lanes_optab>
- ? gen_aarch64_sve_<su>unpkhi_<SVE_BHSI:mode>
- : gen_aarch64_sve_<su>unpklo_<SVE_BHSI:mode>)
- (operands[0], operands[1]));
- DONE;
- }
-)
-
-;; SUNPKHI, UUNPKHI, SUNPKLO and UUNPKLO.
-(define_insn "aarch64_sve_<su>unpk<perm_hilo>_<SVE_BHSI:mode>"
- [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
- (unspec:<VWIDE> [(match_operand:SVE_BHSI 1 "register_operand" "w")]
- UNPACK))]
- "TARGET_SVE"
- "<su>unpk<perm_hilo>\t%0.<Vewtype>, %1.<Vetype>"
-)
-
-;; Unpack one half of a VNx4SF to VNx2DF, or one half of a VNx8HF to VNx4SF.
-;; First unpack the source without conversion, then float-convert the
-;; unpacked source.
-(define_expand "vec_unpacks_<perm_hilo>_<mode>"
- [(match_operand:<VWIDE> 0 "register_operand")
- (unspec:SVE_HSF [(match_operand:SVE_HSF 1 "register_operand")]
- UNPACK_UNSIGNED)]
- "TARGET_SVE"
- {
- /* Use ZIP to do the unpack, since we don't care about the upper halves
- and since it has the nice property of not needing any subregs.
- If using UUNPK* turns out to be preferable, we could model it as
- a ZIP whose first operand is zero. */
- rtx temp = gen_reg_rtx (<MODE>mode);
- emit_insn ((<hi_lanes_optab>
- ? gen_aarch64_sve_zip2<mode>
- : gen_aarch64_sve_zip1<mode>)
- (temp, operands[1], operands[1]));
- rtx ptrue = aarch64_ptrue_reg (<VWIDE_PRED>mode);
- emit_insn (gen_aarch64_sve_extend<mode><Vwide>2 (operands[0],
- ptrue, temp));
- DONE;
- }
-)
+;; -------------------------------------------------------------------------
+;; ---- [FP<-INT] Packs
+;; -------------------------------------------------------------------------
+;; No patterns here yet!
+;; -------------------------------------------------------------------------
+
+;; -------------------------------------------------------------------------
+;; ---- [FP<-INT] Unpacks
+;; -------------------------------------------------------------------------
+;; The patterns in this section are synthetic.
+;; -------------------------------------------------------------------------
;; Unpack one half of a VNx4SI to VNx2DF. First unpack from VNx4SI
;; to VNx2DI, reinterpret the VNx2DI as a VNx4SI, then convert the
@@ -2734,29 +3894,12 @@ (define_expand "vec_unpack<su_optab>_flo
}
)
-;; Predicate pack. Use UZP1 on the narrower type, which discards
-;; the high part of each wide element.
-(define_insn "vec_pack_trunc_<Vwide>"
- [(set (match_operand:PRED_BHS 0 "register_operand" "=Upa")
- (unspec:PRED_BHS
- [(match_operand:<VWIDE> 1 "register_operand" "Upa")
- (match_operand:<VWIDE> 2 "register_operand" "Upa")]
- UNSPEC_PACK))]
- "TARGET_SVE"
- "uzp1\t%0.<Vetype>, %1.<Vetype>, %2.<Vetype>"
-)
-
-;; Integer pack. Use UZP1 on the narrower type, which discards
-;; the high part of each wide element.
-(define_insn "vec_pack_trunc_<Vwide>"
- [(set (match_operand:SVE_BHSI 0 "register_operand" "=w")
- (unspec:SVE_BHSI
- [(match_operand:<VWIDE> 1 "register_operand" "w")
- (match_operand:<VWIDE> 2 "register_operand" "w")]
- UNSPEC_PACK))]
- "TARGET_SVE"
- "uzp1\t%0.<Vetype>, %1.<Vetype>, %2.<Vetype>"
-)
+;; -------------------------------------------------------------------------
+;; ---- [FP<-FP] Packs
+;; -------------------------------------------------------------------------
+;; Includes:
+;; - FCVT
+;; -------------------------------------------------------------------------
;; Convert two vectors of DF to SF, or two vectors of SF to HF, and pack
;; the results into a single vector.
@@ -2783,349 +3926,114 @@ (define_expand "vec_pack_trunc_<Vwide>"
}
)
-;; Convert two vectors of DF to SI and pack the results into a single vector.
-(define_expand "vec_pack_<su>fix_trunc_vnx2df"
- [(set (match_dup 4)
- (unspec:VNx4SI
- [(match_dup 3)
- (FIXUORS:VNx4SI (match_operand:VNx2DF 1 "register_operand"))]
- UNSPEC_MERGE_PTRUE))
- (set (match_dup 5)
- (unspec:VNx4SI
- [(match_dup 3)
- (FIXUORS:VNx4SI (match_operand:VNx2DF 2 "register_operand"))]
- UNSPEC_MERGE_PTRUE))
- (set (match_operand:VNx4SI 0 "register_operand")
- (unspec:VNx4SI [(match_dup 4) (match_dup 5)] UNSPEC_UZP1))]
- "TARGET_SVE"
- {
- operands[3] = aarch64_ptrue_reg (VNx2BImode);
- operands[4] = gen_reg_rtx (VNx4SImode);
- operands[5] = gen_reg_rtx (VNx4SImode);
- }
-)
-
-;; Predicated floating-point operations with select.
-(define_expand "cond_<optab><mode>"
- [(set (match_operand:SVE_F 0 "register_operand")
- (unspec:SVE_F
- [(match_operand:<VPRED> 1 "register_operand")
- (unspec:SVE_F
- [(match_operand:SVE_F 2 "register_operand")
- (match_operand:SVE_F 3 "register_operand")]
- SVE_COND_FP_BINARY)
- (match_operand:SVE_F 4 "aarch64_simd_reg_or_zero")]
- UNSPEC_SEL))]
- "TARGET_SVE"
-)
-
-;; Predicated floating-point operations with select matching first operand.
-(define_insn "*cond_<optab><mode>_2"
- [(set (match_operand:SVE_F 0 "register_operand" "=w, ?&w")
- (unspec:SVE_F
- [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
- (unspec:SVE_F
- [(match_operand:SVE_F 2 "register_operand" "0, w")
- (match_operand:SVE_F 3 "register_operand" "w, w")]
- SVE_COND_FP_BINARY)
- (match_dup 2)]
- UNSPEC_SEL))]
- "TARGET_SVE"
- "@
- <sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
- movprfx\t%0, %2\;<sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>"
- [(set_attr "movprfx" "*,yes")]
-)
-
-;; Predicated floating-point operations with select matching second operand.
-(define_insn "*cond_<optab><mode>_3"
- [(set (match_operand:SVE_F 0 "register_operand" "=w, ?&w")
- (unspec:SVE_F
- [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
- (unspec:SVE_F
- [(match_operand:SVE_F 2 "register_operand" "w, w")
- (match_operand:SVE_F 3 "register_operand" "0, w")]
- SVE_COND_FP_BINARY)
- (match_dup 3)]
- UNSPEC_SEL))]
- "TARGET_SVE"
- "@
- <sve_fp_op_rev>\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype>
- movprfx\t%0, %3\;<sve_fp_op_rev>\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype>"
- [(set_attr "movprfx" "*,yes")]
-)
-
-;; Predicated floating-point binary operations in which the values of
-;; inactive lanes are distinct from the other inputs.
-(define_insn_and_rewrite "*cond_<optab><mode>_any"
- [(set (match_operand:SVE_F 0 "register_operand" "=&w, &w, &w, &w, ?&w")
- (unspec:SVE_F
- [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl, Upl, Upl")
- (unspec:SVE_F
- [(match_operand:SVE_F 2 "register_operand" "0, w, w, w, w")
- (match_operand:SVE_F 3 "register_operand" "w, 0, w, w, w")]
- SVE_COND_FP_BINARY)
- (match_operand:SVE_F 4 "aarch64_simd_reg_or_zero" "Dz, Dz, Dz, 0, w")]
- UNSPEC_SEL))]
- "TARGET_SVE
- && !rtx_equal_p (operands[2], operands[4])
- && !rtx_equal_p (operands[3], operands[4])"
- "@
- movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;<sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
- movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;<sve_fp_op_rev>\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype>
- movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;<sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
- movprfx\t%0.<Vetype>, %1/m, %2.<Vetype>\;<sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
- #"
- "&& reload_completed
- && register_operand (operands[4], <MODE>mode)
- && !rtx_equal_p (operands[0], operands[4])"
- {
- emit_insn (gen_vcond_mask_<mode><vpred> (operands[0], operands[2],
- operands[4], operands[1]));
- operands[4] = operands[2] = operands[0];
- }
- [(set_attr "movprfx" "yes")]
-)
-
-;; Predicated floating-point ternary operations with select.
-(define_expand "cond_<optab><mode>"
- [(set (match_operand:SVE_F 0 "register_operand")
- (unspec:SVE_F
- [(match_operand:<VPRED> 1 "register_operand")
- (unspec:SVE_F
- [(match_operand:SVE_F 2 "register_operand")
- (match_operand:SVE_F 3 "register_operand")
- (match_operand:SVE_F 4 "register_operand")]
- SVE_COND_FP_TERNARY)
- (match_operand:SVE_F 5 "aarch64_simd_reg_or_zero")]
- UNSPEC_SEL))]
- "TARGET_SVE"
-{
- /* Swap the multiplication operands if the fallback value is the
- second of the two. */
- if (rtx_equal_p (operands[3], operands[5]))
- std::swap (operands[2], operands[3]);
-})
-
-;; Predicated floating-point ternary operations using the FMAD-like form.
-(define_insn "*cond_<optab><mode>_2"
- [(set (match_operand:SVE_F 0 "register_operand" "=w, ?&w")
- (unspec:SVE_F
- [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
- (unspec:SVE_F
- [(match_operand:SVE_F 2 "register_operand" "0, w")
- (match_operand:SVE_F 3 "register_operand" "w, w")
- (match_operand:SVE_F 4 "register_operand" "w, w")]
- SVE_COND_FP_TERNARY)
- (match_dup 2)]
- UNSPEC_SEL))]
- "TARGET_SVE"
- "@
- <sve_fmad_op>\t%0.<Vetype>, %1/m, %3.<Vetype>, %4.<Vetype>
- movprfx\t%0, %2\;<sve_fmad_op>\t%0.<Vetype>, %1/m, %3.<Vetype>, %4.<Vetype>"
- [(set_attr "movprfx" "*,yes")]
-)
-
-;; Predicated floating-point ternary operations using the FMLA-like form.
-(define_insn "*cond_<optab><mode>_4"
- [(set (match_operand:SVE_F 0 "register_operand" "=w, ?&w")
- (unspec:SVE_F
- [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
- (unspec:SVE_F
- [(match_operand:SVE_F 2 "register_operand" "w, w")
- (match_operand:SVE_F 3 "register_operand" "w, w")
- (match_operand:SVE_F 4 "register_operand" "0, w")]
- SVE_COND_FP_TERNARY)
- (match_dup 4)]
- UNSPEC_SEL))]
- "TARGET_SVE"
- "@
- <sve_fmla_op>\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>
- movprfx\t%0, %4\;<sve_fmla_op>\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>"
- [(set_attr "movprfx" "*,yes")]
-)
-
-;; Predicated floating-point ternary operations in which the value for
-;; inactive lanes is distinct from the other inputs.
-(define_insn_and_rewrite "*cond_<optab><mode>_any"
- [(set (match_operand:SVE_F 0 "register_operand" "=&w, &w, ?&w")
- (unspec:SVE_F
- [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl")
- (unspec:SVE_F
- [(match_operand:SVE_F 2 "register_operand" "w, w, w")
- (match_operand:SVE_F 3 "register_operand" "w, w, w")
- (match_operand:SVE_F 4 "register_operand" "w, w, w")]
- SVE_COND_FP_TERNARY)
- (match_operand:SVE_F 5 "aarch64_simd_reg_or_zero" "Dz, 0, w")]
- UNSPEC_SEL))]
- "TARGET_SVE
- && !rtx_equal_p (operands[2], operands[5])
- && !rtx_equal_p (operands[3], operands[5])
- && !rtx_equal_p (operands[4], operands[5])"
- "@
- movprfx\t%0.<Vetype>, %1/z, %4.<Vetype>\;<sve_fmla_op>\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>
- movprfx\t%0.<Vetype>, %1/m, %4.<Vetype>\;<sve_fmla_op>\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>
- #"
- "&& reload_completed
- && !CONSTANT_P (operands[5])
- && !rtx_equal_p (operands[0], operands[5])"
- {
- emit_insn (gen_vcond_mask_<mode><vpred> (operands[0], operands[4],
- operands[5], operands[1]));
- operands[5] = operands[4] = operands[0];
- }
- [(set_attr "movprfx" "yes")]
-)
-
-;; Shift an SVE vector left and insert a scalar into element 0.
-(define_insn "vec_shl_insert_<mode>"
- [(set (match_operand:SVE_ALL 0 "register_operand" "=w, w")
- (unspec:SVE_ALL
- [(match_operand:SVE_ALL 1 "register_operand" "0, 0")
- (match_operand:<VEL> 2 "register_operand" "rZ, w")]
- UNSPEC_INSR))]
+;; Conversion of DFs to the same number of SFs, or SFs to the same number
+;; of HFs.
+(define_insn "*trunc<Vwide><mode>2"
+ [(set (match_operand:SVE_HSF 0 "register_operand" "=w")
+ (unspec:SVE_HSF
+ [(match_operand:<VWIDE_PRED> 1 "register_operand" "Upl")
+ (unspec:SVE_HSF
+ [(match_operand:<VWIDE> 2 "register_operand" "w")]
+ UNSPEC_FLOAT_CONVERT)]
+ UNSPEC_MERGE_PTRUE))]
"TARGET_SVE"
- "@
- insr\t%0.<Vetype>, %<vwcore>2
- insr\t%0.<Vetype>, %<Vetype>2"
+ "fcvt\t%0.<Vetype>, %1/m, %2.<Vewtype>"
)
-(define_expand "copysign<mode>3"
- [(match_operand:SVE_F 0 "register_operand")
- (match_operand:SVE_F 1 "register_operand")
- (match_operand:SVE_F 2 "register_operand")]
- "TARGET_SVE"
- {
- rtx sign = gen_reg_rtx (<V_INT_EQUIV>mode);
- rtx mant = gen_reg_rtx (<V_INT_EQUIV>mode);
- rtx int_res = gen_reg_rtx (<V_INT_EQUIV>mode);
- int bits = GET_MODE_UNIT_BITSIZE (<MODE>mode) - 1;
+;; -------------------------------------------------------------------------
+;; ---- [FP<-FP] Unpacks
+;; -------------------------------------------------------------------------
+;; Includes:
+;; - FCVT
+;; -------------------------------------------------------------------------
- rtx arg1 = lowpart_subreg (<V_INT_EQUIV>mode, operands[1], <MODE>mode);
- rtx arg2 = lowpart_subreg (<V_INT_EQUIV>mode, operands[2], <MODE>mode);
-
- emit_insn (gen_and<v_int_equiv>3
- (sign, arg2,
- aarch64_simd_gen_const_vector_dup (<V_INT_EQUIV>mode,
- HOST_WIDE_INT_M1U
- << bits)));
- emit_insn (gen_and<v_int_equiv>3
- (mant, arg1,
- aarch64_simd_gen_const_vector_dup (<V_INT_EQUIV>mode,
- ~(HOST_WIDE_INT_M1U
- << bits))));
- emit_insn (gen_ior<v_int_equiv>3 (int_res, sign, mant));
- emit_move_insn (operands[0], gen_lowpart (<MODE>mode, int_res));
- DONE;
- }
-)
-
-(define_expand "xorsign<mode>3"
- [(match_operand:SVE_F 0 "register_operand")
- (match_operand:SVE_F 1 "register_operand")
- (match_operand:SVE_F 2 "register_operand")]
+;; Unpack one half of a VNx4SF to VNx2DF, or one half of a VNx8HF to VNx4SF.
+;; First unpack the source without conversion, then float-convert the
+;; unpacked source.
+(define_expand "vec_unpacks_<perm_hilo>_<mode>"
+ [(match_operand:<VWIDE> 0 "register_operand")
+ (unspec:SVE_HSF [(match_operand:SVE_HSF 1 "register_operand")]
+ UNPACK_UNSIGNED)]
"TARGET_SVE"
{
- rtx sign = gen_reg_rtx (<V_INT_EQUIV>mode);
- rtx int_res = gen_reg_rtx (<V_INT_EQUIV>mode);
- int bits = GET_MODE_UNIT_BITSIZE (<MODE>mode) - 1;
-
- rtx arg1 = lowpart_subreg (<V_INT_EQUIV>mode, operands[1], <MODE>mode);
- rtx arg2 = lowpart_subreg (<V_INT_EQUIV>mode, operands[2], <MODE>mode);
-
- emit_insn (gen_and<v_int_equiv>3
- (sign, arg2,
- aarch64_simd_gen_const_vector_dup (<V_INT_EQUIV>mode,
- HOST_WIDE_INT_M1U
- << bits)));
- emit_insn (gen_xor<v_int_equiv>3 (int_res, arg1, sign));
- emit_move_insn (operands[0], gen_lowpart (<MODE>mode, int_res));
+ /* Use ZIP to do the unpack, since we don't care about the upper halves
+ and since it has the nice property of not needing any subregs.
+ If using UUNPK* turns out to be preferable, we could model it as
+ a ZIP whose first operand is zero. */
+ rtx temp = gen_reg_rtx (<MODE>mode);
+ emit_insn ((<hi_lanes_optab>
+ ? gen_aarch64_sve_zip2<mode>
+ : gen_aarch64_sve_zip1<mode>)
+ (temp, operands[1], operands[1]));
+ rtx ptrue = aarch64_ptrue_reg (<VWIDE_PRED>mode);
+ emit_insn (gen_aarch64_sve_extend<mode><Vwide>2 (operands[0],
+ ptrue, temp));
DONE;
}
)
-;; Unpredicated DOT product.
-(define_insn "<sur>dot_prod<vsi2qi>"
- [(set (match_operand:SVE_SDI 0 "register_operand" "=w, ?&w")
- (plus:SVE_SDI
- (unspec:SVE_SDI
- [(match_operand:<VSI2QI> 1 "register_operand" "w, w")
- (match_operand:<VSI2QI> 2 "register_operand" "w, w")]
- DOTPROD)
- (match_operand:SVE_SDI 3 "register_operand" "0, w")))]
+;; Conversion of SFs to the same number of DFs, or HFs to the same number
+;; of SFs.
+(define_insn "aarch64_sve_extend<mode><Vwide>2"
+ [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
+ (unspec:<VWIDE>
+ [(match_operand:<VWIDE_PRED> 1 "register_operand" "Upl")
+ (unspec:<VWIDE>
+ [(match_operand:SVE_HSF 2 "register_operand" "w")]
+ UNSPEC_FLOAT_CONVERT)]
+ UNSPEC_MERGE_PTRUE))]
"TARGET_SVE"
- "@
- <sur>dot\\t%0.<Vetype>, %1.<Vetype_fourth>, %2.<Vetype_fourth>
- movprfx\t%0, %3\;<sur>dot\\t%0.<Vetype>, %1.<Vetype_fourth>, %2.<Vetype_fourth>"
- [(set_attr "movprfx" "*,yes")]
+ "fcvt\t%0.<Vewtype>, %1/m, %2.<Vetype>"
)
-;; Unpredicated integer absolute difference.
-(define_expand "<su>abd<mode>_3"
- [(use (match_operand:SVE_I 0 "register_operand"))
- (USMAX:SVE_I (match_operand:SVE_I 1 "register_operand")
- (match_operand:SVE_I 2 "register_operand"))]
- "TARGET_SVE"
- {
- rtx pred = aarch64_ptrue_reg (<VPRED>mode);
- emit_insn (gen_aarch64_<su>abd<mode>_3 (operands[0], pred, operands[1],
- operands[2]));
- DONE;
- }
-)
+;; -------------------------------------------------------------------------
+;; ---- [PRED<-PRED] Packs
+;; -------------------------------------------------------------------------
+;; Includes:
+;; - UZP1
+;; -------------------------------------------------------------------------
-;; Predicated integer absolute difference.
-(define_insn "aarch64_<su>abd<mode>_3"
- [(set (match_operand:SVE_I 0 "register_operand" "=w, ?&w")
- (unspec:SVE_I
- [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
- (minus:SVE_I
- (USMAX:SVE_I
- (match_operand:SVE_I 2 "register_operand" "0, w")
- (match_operand:SVE_I 3 "register_operand" "w, w"))
- (<max_opp>:SVE_I
- (match_dup 2)
- (match_dup 3)))]
- UNSPEC_MERGE_PTRUE))]
+;; Predicate pack. Use UZP1 on the narrower type, which discards
+;; the high part of each wide element.
+(define_insn "vec_pack_trunc_<Vwide>"
+ [(set (match_operand:PRED_BHS 0 "register_operand" "=Upa")
+ (unspec:PRED_BHS
+ [(match_operand:<VWIDE> 1 "register_operand" "Upa")
+ (match_operand:<VWIDE> 2 "register_operand" "Upa")]
+ UNSPEC_PACK))]
"TARGET_SVE"
- "@
- <su>abd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
- movprfx\t%0, %2\;<su>abd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>"
- [(set_attr "movprfx" "*,yes")]
+ "uzp1\t%0.<Vetype>, %1.<Vetype>, %2.<Vetype>"
)
-;; Emit a sequence to produce a sum-of-absolute-differences of the inputs in
-;; operands 1 and 2. The sequence also has to perform a widening reduction of
-;; the difference into a vector and accumulate that into operand 3 before
-;; copying that into the result operand 0.
-;; Perform that with a sequence of:
-;; MOV ones.b, #1
-;; [SU]ABD diff.b, p0/m, op1.b, op2.b
-;; MOVPRFX op0, op3 // If necessary
-;; UDOT op0.s, diff.b, ones.b
+;; -------------------------------------------------------------------------
+;; ---- [PRED<-PRED] Unpacks
+;; -------------------------------------------------------------------------
+;; Includes:
+;; - PUNPKHI
+;; - PUNPKLO
+;; -------------------------------------------------------------------------
-(define_expand "<sur>sad<vsi2qi>"
- [(use (match_operand:SVE_SDI 0 "register_operand"))
- (unspec:<VSI2QI> [(use (match_operand:<VSI2QI> 1 "register_operand"))
- (use (match_operand:<VSI2QI> 2 "register_operand"))] ABAL)
- (use (match_operand:SVE_SDI 3 "register_operand"))]
+;; Unpack the low or high half of a predicate, where "high" refers to
+;; the low-numbered lanes for big-endian and the high-numbered lanes
+;; for little-endian.
+(define_expand "vec_unpack<su>_<perm_hilo>_<mode>"
+ [(match_operand:<VWIDE> 0 "register_operand")
+ (unspec:<VWIDE> [(match_operand:PRED_BHS 1 "register_operand")]
+ UNPACK)]
"TARGET_SVE"
{
- rtx ones = force_reg (<VSI2QI>mode, CONST1_RTX (<VSI2QI>mode));
- rtx diff = gen_reg_rtx (<VSI2QI>mode);
- emit_insn (gen_<sur>abd<vsi2qi>_3 (diff, operands[1], operands[2]));
- emit_insn (gen_udot_prod<vsi2qi> (operands[0], diff, ones, operands[3]));
+ emit_insn ((<hi_lanes_optab>
+ ? gen_aarch64_sve_punpkhi_<PRED_BHS:mode>
+ : gen_aarch64_sve_punpklo_<PRED_BHS:mode>)
+ (operands[0], operands[1]));
DONE;
}
)
-;; Standard pattern name vec_init<mode><Vel>.
-(define_expand "vec_init<mode><Vel>"
- [(match_operand:SVE_ALL 0 "register_operand")
- (match_operand 1 "" "")]
+(define_insn "aarch64_sve_punpk<perm_hilo>_<mode>"
+ [(set (match_operand:<VWIDE> 0 "register_operand" "=Upa")
+ (unspec:<VWIDE> [(match_operand:PRED_BHS 1 "register_operand" "Upa")]
+ UNPACK_UNSIGNED))]
"TARGET_SVE"
- {
- aarch64_sve_expand_vector_init (operands[0], operands[1]);
- DONE;
- }
+ "punpk<perm_hilo>\t%0.h, %1.b"
)
===================================================================
@@ -0,0 +1,66 @@
+#!/usr/bin/awk -f
+# Copyright (C) 2019 Free Software Foundation, Inc.
+#
+# This program is free software; you can redistribute it and/or modify it
+# under the terms of the GNU General Public License as published by the
+# Free Software Foundation; either version 3, or (at your option) any
+# later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; see the file COPYING3. If not see
+# <http://www.gnu.org/licenses/>.
+
+# This awk script checks that aarch64-sve.md (passed either on the
+# command line or via stdin) has an up-to-date contents section.
+
+BEGIN {
+ seen1 = 0
+ seen2 = 0
+ errors = 0
+}
+
+# The headings in the comments use a two-level hierarchy: ";; == ..."
+# for major sections and ";; ---- ..." for minor sections. Each section
+# heading must be unique.
+#
+# The contents section should list all the section headings, using the
+# same text and in the same order. We should therefore see exactly two
+# copies of the section list.
+/^;; == / || /^;; ---- / {
+ if ($0 in seen || seen2 > 0)
+ {
+ if (seen2 >= seen1)
+ {
+ printf "error: line not in contents: %s\n", $0 > "/dev/stderr"
+ errors += 1
+ exit(1)
+ }
+ if ($0 != order[seen2])
+ {
+ printf "error: mismatched contents\n saw: %s\nexpected: %s\n", \
+ $0, order[seen2] > "/dev/stderr"
+ errors += 1
+ exit(1)
+ }
+ seen2 += 1
+ }
+ else
+ {
+ seen[$0] = 1
+ order[seen1] = $0
+ seen1 += 1
+ }
+}
+
+END {
+ if (seen2 < seen1 && errors == 0)
+ {
+ printf "error: line only in contents: %s\n", order[seen2] > "/dev/stderr"
+ exit(1)
+ }
+}
===================================================================
@@ -103,3 +103,10 @@ aarch64-bti-insert.o: $(srcdir)/config/a
comma=,
MULTILIB_OPTIONS = $(subst $(comma),/, $(patsubst %, mabi=%, $(subst $(comma),$(comma)mabi=,$(TM_MULTILIB_CONFIG))))
MULTILIB_DIRNAMES = $(subst $(comma), ,$(TM_MULTILIB_CONFIG))
+
+insn-conditions.md: s-check-sve-md
+s-check-sve-md: $(srcdir)/config/aarch64/check-sve-md.awk \
+ $(srcdir)/config/aarch64/aarch64-sve.md
+ $(AWK) -f $(srcdir)/config/aarch64/check-sve-md.awk \
+ $(srcdir)/config/aarch64/aarch64-sve.md
+ $(STAMP) s-check-sve-md