===================================================================
@@ -2546,6 +2546,36 @@ aarch64_zero_extend_const_eq (machine_mo
}
+/* Return TARGET if it is nonnull and a register of mode MODE.
+ Otherwise, return a fresh register of mode MODE if we can,
+ or TARGET reinterpreted as MODE if we can't. */
+
+static rtx
+aarch64_target_reg (rtx target, machine_mode mode)
+{
+ if (target && REG_P (target) && GET_MODE (target) == mode)
+ return target;
+ if (!can_create_pseudo_p ())
+ {
+ gcc_assert (target);
+ return gen_lowpart (mode, target);
+ }
+ return gen_reg_rtx (mode);
+}
+
+/* Return a register that contains the constant in BUILDER, given that
+ the constant is a legitimate move operand. Use TARGET as the register
+ if it is nonnull and convenient. */
+
+static rtx
+aarch64_emit_set_immediate (rtx target, rtx_vector_builder &builder)
+{
+ rtx src = builder.build ();
+ target = aarch64_target_reg (target, GET_MODE (src));
+ emit_insn (gen_rtx_SET (target, src));
+ return target;
+}
+
static rtx
aarch64_force_temporary (machine_mode mode, rtx x, rtx value)
{
@@ -2721,7 +2751,8 @@ aarch64_ptrue_all (unsigned int elt_size
aarch64_ptrue_reg (machine_mode mode)
{
gcc_assert (GET_MODE_CLASS (mode) == MODE_VECTOR_BOOL);
- return force_reg (mode, CONSTM1_RTX (mode));
+ rtx reg = force_reg (VNx16BImode, CONSTM1_RTX (VNx16BImode));
+ return gen_lowpart (mode, reg);
}
/* Return an all-false predicate register of mode MODE. */
@@ -2730,7 +2761,26 @@ aarch64_ptrue_reg (machine_mode mode)
aarch64_pfalse_reg (machine_mode mode)
{
gcc_assert (GET_MODE_CLASS (mode) == MODE_VECTOR_BOOL);
- return force_reg (mode, CONST0_RTX (mode));
+ rtx reg = force_reg (VNx16BImode, CONST0_RTX (VNx16BImode));
+ return gen_lowpart (mode, reg);
+}
+
+/* Use a comparison to convert integer vector SRC into MODE, which is
+ the corresponding SVE predicate mode. Use TARGET for the result
+ if it's nonnull and convenient. */
+
+static rtx
+aarch64_convert_sve_data_to_pred (rtx target, machine_mode mode, rtx src)
+{
+ machine_mode src_mode = GET_MODE (src);
+ insn_code icode = code_for_aarch64_pred_cmp (NE, src_mode);
+ expand_operand ops[4];
+ create_output_operand (&ops[0], target, mode);
+ create_input_operand (&ops[1], CONSTM1_RTX (mode), mode);
+ create_input_operand (&ops[2], src, src_mode);
+ create_input_operand (&ops[3], CONST0_RTX (src_mode), src_mode);
+ expand_insn (icode, 4, ops);
+ return ops[0].value;
}
/* Return true if we can move VALUE into a register using a single
@@ -3633,15 +3683,80 @@ aarch64_expand_sve_const_vector (rtx tar
return target;
}
-/* Use WHILE to set predicate register DEST so that the first VL bits
- are set and the rest are clear. */
+/* Use WHILE to set a predicate register of mode MODE in which the first
+ VL bits are set and the rest are clear. Use TARGET for the register
+ if it's nonnull and convenient. */
-static void
-aarch64_sve_move_pred_via_while (rtx dest, unsigned int vl)
+static rtx
+aarch64_sve_move_pred_via_while (rtx target, machine_mode mode,
+ unsigned int vl)
{
rtx limit = force_reg (DImode, gen_int_mode (vl, DImode));
- emit_insn (gen_while_ult (DImode, GET_MODE (dest),
- dest, const0_rtx, limit));
+ target = aarch64_target_reg (target, mode);
+ emit_insn (gen_while_ult (DImode, mode, target, const0_rtx, limit));
+ return target;
+}
+
+/* Subroutine of aarch64_expand_sve_const_pred. Try to load the VNx16BI
+ constant in BUILDER into an SVE predicate register. Return the register
+ on success, otherwise return null. Use TARGET for the register if
+ nonnull and convenient. */
+
+static rtx
+aarch64_expand_sve_const_pred_1 (rtx target, rtx_vector_builder &builder)
+{
+ if (builder.encoded_nelts () == 1)
+ /* A PFALSE or a PTRUE .B ALL. */
+ return aarch64_emit_set_immediate (target, builder);
+
+ unsigned int elt_size = aarch64_widest_sve_pred_elt_size (builder);
+ if (int vl = aarch64_partial_ptrue_length (builder, elt_size))
+ {
+ /* If we can load the constant using PTRUE, use it as-is. */
+ machine_mode mode = aarch64_sve_pred_mode (elt_size).require ();
+ if (aarch64_svpattern_for_vl (mode, vl) != AARCH64_NUM_SVPATTERNS)
+ return aarch64_emit_set_immediate (target, builder);
+
+ /* Otherwise use WHILE to set the first VL bits. */
+ return aarch64_sve_move_pred_via_while (target, mode, vl);
+ }
+
+ return NULL_RTX;
+}
+
+/* Return an SVE predicate register that contains the VNx16BImode
+ constant in BUILDER, without going through the move expanders.
+
+ The returned register can have whatever mode seems most natural
+ given the contents of BUILDER. Use TARGET for the result if
+ convenient. */
+
+static rtx
+aarch64_expand_sve_const_pred (rtx target, rtx_vector_builder &builder)
+{
+ /* Try loading the constant using pure predicate operations. */
+ if (rtx res = aarch64_expand_sve_const_pred_1 (target, builder))
+ return res;
+
+ /* Try forcing the constant to memory. */
+ if (builder.full_nelts ().is_constant ())
+ if (rtx mem = force_const_mem (VNx16BImode, builder.build ()))
+ {
+ target = aarch64_target_reg (target, VNx16BImode);
+ emit_move_insn (target, mem);
+ return target;
+ }
+
+ /* The last resort is to load the constant as an integer and then
+ compare it against zero. Use -1 for set bits in order to increase
+ the changes of using SVE DUPM or an Advanced SIMD byte mask. */
+ rtx_vector_builder int_builder (VNx16QImode, builder.npatterns (),
+ builder.nelts_per_pattern ());
+ for (unsigned int i = 0; i < builder.encoded_nelts (); ++i)
+ int_builder.quick_push (INTVAL (builder.elt (i))
+ ? constm1_rtx : const0_rtx);
+ return aarch64_convert_sve_data_to_pred (target, VNx16BImode,
+ int_builder.build ());
}
/* Set DEST to immediate IMM. */
@@ -3770,6 +3885,32 @@ aarch64_expand_mov_immediate (rtx dest,
if (!CONST_INT_P (imm))
{
+ if (GET_MODE_CLASS (mode) == MODE_VECTOR_BOOL)
+ {
+ /* Only the low bit of each .H, .S and .D element is defined,
+ so we can set the upper bits to whatever we like. If the
+ predicate is all-true in MODE, prefer to set all the undefined
+ bits as well, so that we can share a single .B predicate for
+ all modes. */
+ if (imm == CONSTM1_RTX (mode))
+ imm = CONSTM1_RTX (VNx16BImode);
+
+ /* All methods for constructing predicate modes wider than VNx16BI
+ will set the upper bits of each element to zero. Expose this
+ by moving such constants as a VNx16BI, so that all bits are
+ significant and so that constants for different modes can be
+ shared. The wider constant will still be available as a
+ REG_EQUAL note. */
+ rtx_vector_builder builder;
+ if (aarch64_get_sve_pred_bits (builder, imm))
+ {
+ rtx res = aarch64_expand_sve_const_pred (dest, builder);
+ if (dest != res)
+ emit_move_insn (dest, gen_lowpart (mode, res));
+ return;
+ }
+ }
+
if (GET_CODE (imm) == HIGH
|| aarch64_simd_valid_immediate (imm, NULL))
{
@@ -3777,19 +3918,6 @@ aarch64_expand_mov_immediate (rtx dest,
return;
}
- rtx_vector_builder builder;
- if (GET_MODE_CLASS (GET_MODE (imm)) == MODE_VECTOR_BOOL
- && aarch64_get_sve_pred_bits (builder, imm))
- {
- unsigned int elt_size = aarch64_widest_sve_pred_elt_size (builder);
- int vl = aarch64_partial_ptrue_length (builder, elt_size);
- if (vl > 0)
- {
- aarch64_sve_move_pred_via_while (dest, vl);
- return;
- }
- }
-
if (GET_CODE (imm) == CONST_VECTOR && aarch64_sve_data_mode_p (mode))
if (rtx res = aarch64_expand_sve_const_vector (dest, imm))
{
@@ -15178,7 +15306,17 @@ aarch64_mov_operand_p (rtx x, machine_mo
return true;
if (VECTOR_MODE_P (GET_MODE (x)))
- return aarch64_simd_valid_immediate (x, NULL);
+ {
+ /* Require predicate constants to be VNx16BI before RA, so that we
+ force everything to have a canonical form. */
+ if (!lra_in_progress
+ && !reload_completed
+ && GET_MODE_CLASS (GET_MODE (x)) == MODE_VECTOR_BOOL
+ && GET_MODE (x) != VNx16BImode)
+ return false;
+
+ return aarch64_simd_valid_immediate (x, NULL);
+ }
if (GET_CODE (x) == SYMBOL_REF && mode == DImode && CONSTANT_ADDRESS_P (x))
return true;
===================================================================
@@ -846,7 +846,7 @@ (define_expand "vec_duplicate<mode>"
[(set (match_operand:SVE_ALL 0 "register_operand")
(vec_duplicate:SVE_ALL
(match_operand:<VEL> 1 "aarch64_sve_dup_operand")))
- (clobber (scratch:<VPRED>))])]
+ (clobber (scratch:VNx16BI))])]
"TARGET_SVE"
{
if (MEM_P (operands[1]))
@@ -867,7 +867,7 @@ (define_insn_and_split "*vec_duplicate<m
[(set (match_operand:SVE_ALL 0 "register_operand" "=w, w, w")
(vec_duplicate:SVE_ALL
(match_operand:<VEL> 1 "aarch64_sve_dup_operand" "r, w, Uty")))
- (clobber (match_scratch:<VPRED> 2 "=X, X, Upl"))]
+ (clobber (match_scratch:VNx16BI 2 "=X, X, Upl"))]
"TARGET_SVE"
"@
mov\t%0.<Vetype>, %<vwcore>1
@@ -877,9 +877,10 @@ (define_insn_and_split "*vec_duplicate<m
[(const_int 0)]
{
if (GET_CODE (operands[2]) == SCRATCH)
- operands[2] = gen_reg_rtx (<VPRED>mode);
- emit_move_insn (operands[2], CONSTM1_RTX (<VPRED>mode));
- emit_insn (gen_sve_ld1r<mode> (operands[0], operands[2], operands[1],
+ operands[2] = gen_reg_rtx (VNx16BImode);
+ emit_move_insn (operands[2], CONSTM1_RTX (VNx16BImode));
+ rtx gp = gen_lowpart (<VPRED>mode, operands[2]);
+ emit_insn (gen_sve_ld1r<mode> (operands[0], gp, operands[1],
CONST0_RTX (<MODE>mode)));
DONE;
}
@@ -2971,7 +2972,7 @@ (define_insn_and_split "*pred_cmp<cmp_op
)
;; Predicated integer comparisons.
-(define_insn "*pred_cmp<cmp_op><mode>"
+(define_insn "@aarch64_pred_cmp<cmp_op><mode>"
[(set (match_operand:<VPRED> 0 "register_operand" "=Upa, Upa")
(and:<VPRED>
(SVE_INT_CMP:<VPRED>
===================================================================
@@ -24,9 +24,10 @@ TEST_LOOP (uint16_t, 0x1234);
TEST_LOOP (uint32_t, 0x12345);
TEST_LOOP (uint64_t, 0x123456);
+/* { dg-final { scan-assembler-times {\tptrue\tp[0-9]+\.b,} 6 } } */
/* { dg-final { scan-assembler-not {\tptrue\tp[0-9]+\.h,} } } */
-/* { dg-final { scan-assembler-times {\tptrue\tp[0-9]+\.s,} 3 } } */
-/* { dg-final { scan-assembler-times {\tptrue\tp[0-9]+\.d,} 3 } } */
+/* { dg-final { scan-assembler-not {\tptrue\tp[0-9]+\.s,} } } */
+/* { dg-final { scan-assembler-not {\tptrue\tp[0-9]+\.d,} } } */
/* { dg-final { scan-assembler-times {\tmov\tz[0-9]+\.h, w[0-9]+\n} 3 } } */
/* { dg-final { scan-assembler-times {\tld1rw\tz[0-9]+\.s,} 3 } } */
/* { dg-final { scan-assembler-times {\tld1rd\tz[0-9]+\.d,} 3 } } */
===================================================================
@@ -40,10 +40,7 @@ TEST_LOOP (double, 3.0)
/* { dg-final { scan-assembler-times {\tfmov\tz[0-9]+\.s, #2\.0e\+0\n} 1 } } */
/* { dg-final { scan-assembler-times {\tfmov\tz[0-9]+\.d, #3\.0e\+0\n} 1 } } */
-/* { dg-final { scan-assembler-times {\tptrue\tp[0-7]\.b, vl32\n} 2 } } */
-/* { dg-final { scan-assembler-times {\tptrue\tp[0-7]\.h, vl16\n} 3 } } */
-/* { dg-final { scan-assembler-times {\tptrue\tp[0-7]\.s, vl8\n} 3 } } */
-/* { dg-final { scan-assembler-times {\tptrue\tp[0-7]\.d, vl4\n} 3 } } */
+/* { dg-final { scan-assembler-times {\tptrue\tp[0-7]\.b, vl32\n} 11 } } */
/* { dg-final { scan-assembler-times {\tst1b\tz[0-9]+\.b,} 2 } } */
/* { dg-final { scan-assembler-times {\tst1h\tz[0-9]+\.h,} 3 } } */
===================================================================
@@ -16,10 +16,7 @@ #define N 64
/* { dg-final { scan-assembler-times {\tfmov\tz[0-9]+\.s, #2\.0e\+0\n} 1 } } */
/* { dg-final { scan-assembler-times {\tfmov\tz[0-9]+\.d, #3\.0e\+0\n} 1 } } */
-/* { dg-final { scan-assembler-times {\tptrue\tp[0-7]\.b, vl64\n} 2 } } */
-/* { dg-final { scan-assembler-times {\tptrue\tp[0-7]\.h, vl32\n} 3 } } */
-/* { dg-final { scan-assembler-times {\tptrue\tp[0-7]\.s, vl16\n} 3 } } */
-/* { dg-final { scan-assembler-times {\tptrue\tp[0-7]\.d, vl8\n} 3 } } */
+/* { dg-final { scan-assembler-times {\tptrue\tp[0-7]\.b, vl64\n} 11 } } */
/* { dg-final { scan-assembler-times {\tst1b\tz[0-9]+\.b,} 2 } } */
/* { dg-final { scan-assembler-times {\tst1h\tz[0-9]+\.h,} 3 } } */
===================================================================
@@ -16,10 +16,7 @@ #define N 128
/* { dg-final { scan-assembler-times {\tfmov\tz[0-9]+\.s, #2\.0e\+0\n} 1 } } */
/* { dg-final { scan-assembler-times {\tfmov\tz[0-9]+\.d, #3\.0e\+0\n} 1 } } */
-/* { dg-final { scan-assembler-times {\tptrue\tp[0-7]\.b, vl128\n} 2 } } */
-/* { dg-final { scan-assembler-times {\tptrue\tp[0-7]\.h, vl64\n} 3 } } */
-/* { dg-final { scan-assembler-times {\tptrue\tp[0-7]\.s, vl32\n} 3 } } */
-/* { dg-final { scan-assembler-times {\tptrue\tp[0-7]\.d, vl16\n} 3 } } */
+/* { dg-final { scan-assembler-times {\tptrue\tp[0-7]\.b, vl128\n} 11 } } */
/* { dg-final { scan-assembler-times {\tst1b\tz[0-9]+\.b,} 2 } } */
/* { dg-final { scan-assembler-times {\tst1h\tz[0-9]+\.h,} 3 } } */
===================================================================
@@ -16,10 +16,7 @@ #define N 256
/* { dg-final { scan-assembler-times {\tfmov\tz[0-9]+\.s, #2\.0e\+0\n} 1 } } */
/* { dg-final { scan-assembler-times {\tfmov\tz[0-9]+\.d, #3\.0e\+0\n} 1 } } */
-/* { dg-final { scan-assembler-times {\tptrue\tp[0-7]\.b, vl256\n} 2 } } */
-/* { dg-final { scan-assembler-times {\tptrue\tp[0-7]\.h, vl128\n} 3 } } */
-/* { dg-final { scan-assembler-times {\tptrue\tp[0-7]\.s, vl64\n} 3 } } */
-/* { dg-final { scan-assembler-times {\tptrue\tp[0-7]\.d, vl32\n} 3 } } */
+/* { dg-final { scan-assembler-times {\tptrue\tp[0-7]\.b, vl256\n} 11 } } */
/* { dg-final { scan-assembler-times {\tst1b\tz[0-9]+\.b,} 2 } } */
/* { dg-final { scan-assembler-times {\tst1h\tz[0-9]+\.h,} 3 } } */