===================================================================
@@ -476,8 +476,9 @@ bool aarch64_zero_extend_const_eq (machi
bool aarch64_move_imm (HOST_WIDE_INT, machine_mode);
opt_machine_mode aarch64_sve_pred_mode (unsigned int);
bool aarch64_sve_cnt_immediate_p (rtx);
+bool aarch64_sve_scalar_inc_dec_immediate_p (rtx);
bool aarch64_sve_addvl_addpl_immediate_p (rtx);
-bool aarch64_sve_inc_dec_immediate_p (rtx);
+bool aarch64_sve_vector_inc_dec_immediate_p (rtx);
int aarch64_add_offset_temporaries (rtx);
void aarch64_split_add_offset (scalar_int_mode, rtx, rtx, rtx, rtx, rtx);
bool aarch64_mov_operand_p (rtx, machine_mode);
@@ -485,8 +486,9 @@ rtx aarch64_reverse_mask (machine_mode,
bool aarch64_offset_7bit_signed_scaled_p (machine_mode, poly_int64);
bool aarch64_offset_9bit_signed_unscaled_p (machine_mode, poly_int64);
char *aarch64_output_sve_cnt_immediate (const char *, const char *, rtx);
-char *aarch64_output_sve_addvl_addpl (rtx, rtx, rtx);
-char *aarch64_output_sve_inc_dec_immediate (const char *, rtx);
+char *aarch64_output_sve_scalar_inc_dec (rtx);
+char *aarch64_output_sve_addvl_addpl (rtx);
+char *aarch64_output_sve_vector_inc_dec (const char *, rtx);
char *aarch64_output_scalar_simd_mov_immediate (rtx, scalar_int_mode);
char *aarch64_output_simd_mov_immediate (rtx, unsigned,
enum simd_immediate_check w = AARCH64_CHECK_MOV);
===================================================================
@@ -2950,6 +2950,33 @@ aarch64_output_sve_cnt_immediate (const
value.coeffs[1], 0);
}
+/* Return true if we can add X using a single SVE INC or DEC instruction. */
+
+bool
+aarch64_sve_scalar_inc_dec_immediate_p (rtx x)
+{
+ poly_int64 value;
+ return (poly_int_rtx_p (x, &value)
+ && (aarch64_sve_cnt_immediate_p (value)
+ || aarch64_sve_cnt_immediate_p (-value)));
+}
+
+/* Return the asm string for adding SVE INC/DEC immediate OFFSET to
+ operand 0. */
+
+char *
+aarch64_output_sve_scalar_inc_dec (rtx offset)
+{
+ poly_int64 offset_value = rtx_to_poly_int64 (offset);
+ gcc_assert (offset_value.coeffs[0] == offset_value.coeffs[1]);
+ if (offset_value.coeffs[1] > 0)
+ return aarch64_output_sve_cnt_immediate ("inc", "%x0",
+ offset_value.coeffs[1], 0);
+ else
+ return aarch64_output_sve_cnt_immediate ("dec", "%x0",
+ -offset_value.coeffs[1], 0);
+}
+
/* Return true if we can add VALUE to a register using a single ADDVL
or ADDPL instruction. */
@@ -2975,27 +3002,16 @@ aarch64_sve_addvl_addpl_immediate_p (rtx
&& aarch64_sve_addvl_addpl_immediate_p (value));
}
-/* Return the asm string for adding ADDVL or ADDPL immediate X to operand 1
- and storing the result in operand 0. */
+/* Return the asm string for adding ADDVL or ADDPL immediate OFFSET
+ to operand 1 and storing the result in operand 0. */
char *
-aarch64_output_sve_addvl_addpl (rtx dest, rtx base, rtx offset)
+aarch64_output_sve_addvl_addpl (rtx offset)
{
static char buffer[sizeof ("addpl\t%x0, %x1, #-") + 3 * sizeof (int)];
poly_int64 offset_value = rtx_to_poly_int64 (offset);
gcc_assert (aarch64_sve_addvl_addpl_immediate_p (offset_value));
- /* Use INC or DEC if possible. */
- if (rtx_equal_p (dest, base) && GP_REGNUM_P (REGNO (dest)))
- {
- if (aarch64_sve_cnt_immediate_p (offset_value))
- return aarch64_output_sve_cnt_immediate ("inc", "%x0",
- offset_value.coeffs[1], 0);
- if (aarch64_sve_cnt_immediate_p (-offset_value))
- return aarch64_output_sve_cnt_immediate ("dec", "%x0",
- -offset_value.coeffs[1], 0);
- }
-
int factor = offset_value.coeffs[1];
if ((factor & 15) == 0)
snprintf (buffer, sizeof (buffer), "addvl\t%%x0, %%x1, #%d", factor / 16);
@@ -3010,8 +3026,8 @@ aarch64_output_sve_addvl_addpl (rtx dest
factor in *FACTOR_OUT (if nonnull). */
bool
-aarch64_sve_inc_dec_immediate_p (rtx x, int *factor_out,
- unsigned int *nelts_per_vq_out)
+aarch64_sve_vector_inc_dec_immediate_p (rtx x, int *factor_out,
+ unsigned int *nelts_per_vq_out)
{
rtx elt;
poly_int64 value;
@@ -3045,9 +3061,9 @@ aarch64_sve_inc_dec_immediate_p (rtx x,
instruction. */
bool
-aarch64_sve_inc_dec_immediate_p (rtx x)
+aarch64_sve_vector_inc_dec_immediate_p (rtx x)
{
- return aarch64_sve_inc_dec_immediate_p (x, NULL, NULL);
+ return aarch64_sve_vector_inc_dec_immediate_p (x, NULL, NULL);
}
/* Return the asm template for an SVE vector INC or DEC instruction.
@@ -3055,11 +3071,11 @@ aarch64_sve_inc_dec_immediate_p (rtx x)
value of the vector count operand itself. */
char *
-aarch64_output_sve_inc_dec_immediate (const char *operands, rtx x)
+aarch64_output_sve_vector_inc_dec (const char *operands, rtx x)
{
int factor;
unsigned int nelts_per_vq;
- if (!aarch64_sve_inc_dec_immediate_p (x, &factor, &nelts_per_vq))
+ if (!aarch64_sve_vector_inc_dec_immediate_p (x, &factor, &nelts_per_vq))
gcc_unreachable ();
if (factor < 0)
return aarch64_output_sve_cnt_immediate ("dec", operands, -factor,
===================================================================
@@ -144,10 +144,18 @@ (define_predicate "aarch64_pluslong_stri
(and (match_operand 0 "aarch64_pluslong_immediate")
(not (match_operand 0 "aarch64_plus_immediate"))))
+(define_predicate "aarch64_sve_scalar_inc_dec_immediate"
+ (and (match_code "const_poly_int")
+ (match_test "aarch64_sve_scalar_inc_dec_immediate_p (op)")))
+
(define_predicate "aarch64_sve_addvl_addpl_immediate"
(and (match_code "const_poly_int")
(match_test "aarch64_sve_addvl_addpl_immediate_p (op)")))
+(define_predicate "aarch64_sve_plus_immediate"
+ (ior (match_operand 0 "aarch64_sve_scalar_inc_dec_immediate")
+ (match_operand 0 "aarch64_sve_addvl_addpl_immediate")))
+
(define_predicate "aarch64_split_add_offset_immediate"
(and (match_code "const_poly_int")
(match_test "aarch64_add_offset_temporaries (op) == 1")))
@@ -155,7 +163,8 @@ (define_predicate "aarch64_split_add_off
(define_predicate "aarch64_pluslong_operand"
(ior (match_operand 0 "register_operand")
(match_operand 0 "aarch64_pluslong_immediate")
- (match_operand 0 "aarch64_sve_addvl_addpl_immediate")))
+ (and (match_test "TARGET_SVE")
+ (match_operand 0 "aarch64_sve_plus_immediate"))))
(define_predicate "aarch64_pluslong_or_poly_operand"
(ior (match_operand 0 "aarch64_pluslong_operand")
@@ -602,9 +611,9 @@ (define_predicate "aarch64_sve_sub_arith
(and (match_code "const,const_vector")
(match_test "aarch64_sve_arith_immediate_p (op, true)")))
-(define_predicate "aarch64_sve_inc_dec_immediate"
+(define_predicate "aarch64_sve_vector_inc_dec_immediate"
(and (match_code "const,const_vector")
- (match_test "aarch64_sve_inc_dec_immediate_p (op)")))
+ (match_test "aarch64_sve_vector_inc_dec_immediate_p (op)")))
(define_predicate "aarch64_sve_uxtb_immediate"
(and (match_code "const_vector")
@@ -687,7 +696,7 @@ (define_predicate "aarch64_sve_arith_ope
(define_predicate "aarch64_sve_add_operand"
(ior (match_operand 0 "aarch64_sve_arith_operand")
(match_operand 0 "aarch64_sve_sub_arith_immediate")
- (match_operand 0 "aarch64_sve_inc_dec_immediate")))
+ (match_operand 0 "aarch64_sve_vector_inc_dec_immediate")))
(define_predicate "aarch64_sve_pred_and_operand"
(ior (match_operand 0 "register_operand")
===================================================================
@@ -49,6 +49,12 @@ (define_constraint "Uaa"
(and (match_code "const_int")
(match_test "aarch64_pluslong_strict_immedate (op, VOIDmode)")))
+(define_constraint "Uai"
+ "@internal
+ A constraint that matches a VG-based constant that can be added by
+ a single INC or DEC."
+ (match_operand 0 "aarch64_sve_scalar_inc_dec_immediate"))
+
(define_constraint "Uav"
"@internal
A constraint that matches a VG-based constant that can be added by
@@ -416,7 +422,7 @@ (define_constraint "vsi"
"@internal
A constraint that matches a vector count operand valid for SVE INC and
DEC instructions."
- (match_operand 0 "aarch64_sve_inc_dec_immediate"))
+ (match_operand 0 "aarch64_sve_vector_inc_dec_immediate"))
(define_constraint "vsn"
"@internal
===================================================================
@@ -1753,6 +1753,7 @@ (define_expand "add<mode>3"
/* If the constant is too large for a single instruction and isn't frame
based, split off the immediate so it is available for CSE. */
if (!aarch64_plus_immediate (operands[2], <MODE>mode)
+ && !(TARGET_SVE && aarch64_sve_plus_immediate (operands[2], <MODE>mode))
&& can_create_pseudo_p ()
&& (!REG_P (op1)
|| !REGNO_PTR_FRAME_P (REGNO (op1))))
@@ -1770,10 +1771,10 @@ (define_expand "add<mode>3"
(define_insn "*add<mode>3_aarch64"
[(set
- (match_operand:GPI 0 "register_operand" "=rk,rk,w,rk,r,rk")
+ (match_operand:GPI 0 "register_operand" "=rk,rk,w,rk,r,r,rk")
(plus:GPI
- (match_operand:GPI 1 "register_operand" "%rk,rk,w,rk,rk,rk")
- (match_operand:GPI 2 "aarch64_pluslong_operand" "I,r,w,J,Uaa,Uav")))]
+ (match_operand:GPI 1 "register_operand" "%rk,rk,w,rk,rk,0,rk")
+ (match_operand:GPI 2 "aarch64_pluslong_operand" "I,r,w,J,Uaa,Uai,Uav")))]
""
"@
add\\t%<w>0, %<w>1, %2
@@ -1781,10 +1782,11 @@ (define_insn "*add<mode>3_aarch64"
add\\t%<rtn>0<vas>, %<rtn>1<vas>, %<rtn>2<vas>
sub\\t%<w>0, %<w>1, #%n2
#
- * return aarch64_output_sve_addvl_addpl (operands[0], operands[1], operands[2]);"
- ;; The "alu_imm" type for ADDVL/ADDPL is just a placeholder.
- [(set_attr "type" "alu_imm,alu_sreg,neon_add,alu_imm,multiple,alu_imm")
- (set_attr "arch" "*,*,simd,*,*,*")]
+ * return aarch64_output_sve_scalar_inc_dec (operands[2]);
+ * return aarch64_output_sve_addvl_addpl (operands[2]);"
+ ;; The "alu_imm" types for INC/DEC and ADDVL/ADDPL are just placeholders.
+ [(set_attr "type" "alu_imm,alu_sreg,neon_add,alu_imm,multiple,alu_imm,alu_imm")
+ (set_attr "arch" "*,*,simd,*,*,sve,sve")]
)
;; zero_extend version of above
@@ -1863,17 +1865,18 @@ (define_split
;; this pattern.
(define_insn_and_split "*add<mode>3_poly_1"
[(set
- (match_operand:GPI 0 "register_operand" "=r,r,r,r,r,&r")
+ (match_operand:GPI 0 "register_operand" "=r,r,r,r,r,r,&r")
(plus:GPI
- (match_operand:GPI 1 "register_operand" "%rk,rk,rk,rk,rk,rk")
- (match_operand:GPI 2 "aarch64_pluslong_or_poly_operand" "I,r,J,Uaa,Uav,Uat")))]
+ (match_operand:GPI 1 "register_operand" "%rk,rk,rk,rk,rk,0,rk")
+ (match_operand:GPI 2 "aarch64_pluslong_or_poly_operand" "I,r,J,Uaa,Uav,Uai,Uat")))]
"TARGET_SVE && operands[0] != stack_pointer_rtx"
"@
add\\t%<w>0, %<w>1, %2
add\\t%<w>0, %<w>1, %<w>2
sub\\t%<w>0, %<w>1, #%n2
#
- * return aarch64_output_sve_addvl_addpl (operands[0], operands[1], operands[2]);
+ * return aarch64_output_sve_scalar_inc_dec (operands[2]);
+ * return aarch64_output_sve_addvl_addpl (operands[2]);
#"
"&& epilogue_completed
&& !reg_overlap_mentioned_p (operands[0], operands[1])
@@ -1884,8 +1887,8 @@ (define_insn_and_split "*add<mode>3_poly
operands[2], operands[0], NULL_RTX);
DONE;
}
- ;; The "alu_imm" type for ADDVL/ADDPL is just a placeholder.
- [(set_attr "type" "alu_imm,alu_sreg,alu_imm,multiple,alu_imm,multiple")]
+ ;; The "alu_imm" types for INC/DEC and ADDVL/ADDPL are just placeholders.
+ [(set_attr "type" "alu_imm,alu_sreg,alu_imm,multiple,alu_imm,alu_imm,multiple")]
)
(define_split
===================================================================
@@ -1971,7 +1971,7 @@ (define_insn "add<mode>3"
"@
add\t%0.<Vetype>, %0.<Vetype>, #%D2
sub\t%0.<Vetype>, %0.<Vetype>, #%N2
- * return aarch64_output_sve_inc_dec_immediate (\"%0.<Vetype>\", operands[2]);
+ * return aarch64_output_sve_vector_inc_dec (\"%0.<Vetype>\", operands[2]);
movprfx\t%0, %1\;add\t%0.<Vetype>, %0.<Vetype>, #%D2
movprfx\t%0, %1\;sub\t%0.<Vetype>, %0.<Vetype>, #%N2
add\t%0.<Vetype>, %1.<Vetype>, %2.<Vetype>"