[SVE ACLE] Fix @vcond_mask_<mode><vpred> patterns
This started as a fix for a typo in @vcond_mask_<mode><vpred>:
movprfx\t%0.<Vetype>, %3/z, %0.<Vetype>\;mov\t%0.<Vetype>, %3/m, %<Vetype>1
Operand 1 here is a vector, not a scalar, so it should be referenced
using %1.<Vetype> rather than %<Vetype>1. This was causing several
execution failures on the branch.
But vector MOV /M is an alias of SEL, which doesn't take MOVPRFX,
so just changing the operand syntax wouldn't give a valid alternative.
The patch therefore drops the alternative instead. To keep the pre-RA
instruction selection tighter, we now need to exclude the old alternative
using instruction conditions.
Also, I'd originally thought Dn was the right choice of constraint
for the float immediate patterns, but that includes DUPM constants too,
which can't be predicated. The patch therefore tightens the predicates
and constraints to only accept things accepted by CPY and FCPY, and makes
sure that "movprfx" is only set to "yes" for alternatives that need it.
@@ -410,7 +410,7 @@ char *aarch64_output_sve_inc_dec_immediate (const char *, rtx);
char *aarch64_output_scalar_simd_mov_immediate (rtx, scalar_int_mode);
char *aarch64_output_simd_mov_immediate (rtx, unsigned,
enum simd_immediate_check w = AARCH64_CHECK_MOV);
-char *aarch64_output_sve_mov_immediate (rtx, int = -1, bool = false);
+char *aarch64_output_sve_mov_immediate (rtx);
bool aarch64_pad_reg_upward (machine_mode, const_tree, bool);
bool aarch64_regno_ok_for_base_p (int, bool);
bool aarch64_regno_ok_for_index_p (int, bool);
@@ -1903,37 +1903,39 @@
;; vcond_mask operand order: true, false, mask
;; UNSPEC_SEL operand order: mask, true, false (as for VEC_COND_EXPR)
;; SEL operand order: mask, true, false
-(define_insn "@vcond_mask_<mode><vpred>"
- [(set (match_operand:SVE_I 0 "register_operand" "=w, w, ?&w, w, ?&w")
- (unspec:SVE_I
- [(match_operand:<VPRED> 3 "register_operand" "Upa, Upa, Upl, Upa, Upl")
- (match_operand:SVE_I 1 "aarch64_sve_dup_reg_or_imm" "w, vss, w, vss, vss")
- (match_operand:SVE_I 2 "aarch64_simd_reg_or_zero" "w, 0, Dz, Dz, w")]
+(define_expand "@vcond_mask_<mode><vpred>"
+ [(set (match_operand:SVE_ALL 0 "register_operand")
+ (unspec:SVE_ALL
+ [(match_operand:<VPRED> 3 "register_operand")
+ (match_operand:SVE_ALL 1 "aarch64_sve_dup_reg_or_imm")
+ (match_operand:SVE_ALL 2 "aarch64_simd_reg_or_zero")]
UNSPEC_SEL))]
"TARGET_SVE"
- "@
- sel\t%0.<Vetype>, %3, %1.<Vetype>, %2.<Vetype>
- mov\t%0.<Vetype>, %3/m, #%1
- movprfx\t%0.<Vetype>, %3/z, %0.<Vetype>\;mov\t%0.<Vetype>, %3/m, %1.<Vetype>
- mov\t%0.<Vetype>, %3/z, #%1
- movprfx\t%0, %2\;mov\t%0.<Vetype>, %3/m, #%1"
- [(set_attr "movprfx" "*,*,yes,*,yes")]
+ {
+ if (register_operand (operands[1], <MODE>mode))
+ operands[2] = force_reg (<MODE>mode, operands[2]);
+ }
)
-(define_insn "@vcond_mask_<mode><vpred>"
- [(set (match_operand:SVE_F 0 "register_operand" "=w, w, ?&w, ?&w")
- (unspec:SVE_F
- [(match_operand:<VPRED> 3 "register_operand" "Upa, Upl, Upl, Upl")
- (match_operand:SVE_F 1 "aarch64_nonmemory_operand" "w, Dn, w, Dn")
- (match_operand:SVE_F 2 "aarch64_simd_reg_or_zero" "w, 0, Dz, Dz")]
+(define_insn "*vcond_mask_<mode><vpred>"
+ [(set (match_operand:SVE_ALL 0 "register_operand" "=w, w, w, w, ?w, ?&w, ?&w")
+ (unspec:SVE_ALL
+ [(match_operand:<VPRED> 3 "register_operand" "Upa, Upa, Upa, Upa, Upl, Upl, Upl")
+ (match_operand:SVE_ALL 1 "aarch64_sve_dup_reg_or_imm" "w, vss, vss, Ufc, Ufc, vss, Ufc")
+ (match_operand:SVE_ALL 2 "aarch64_simd_reg_or_zero" "w, 0, Dz, 0, Dz, w, w")]
UNSPEC_SEL))]
- "TARGET_SVE"
+ "TARGET_SVE
+ && (!register_operand (operands[1], <MODE>mode)
+ || register_operand (operands[2], <MODE>mode))"
"@
sel\t%0.<Vetype>, %3, %1.<Vetype>, %2.<Vetype>
- * return aarch64_output_sve_mov_immediate (operands[1], 3, true);
- movprfx\t%0.<Vetype>, %3/z, %0.<Vetype>\;mov\t%0.<Vetype>, %3/m, %<Vetype>1
- * return aarch64_output_sve_mov_immediate (operands[1], 3, false);"
- [(set_attr "movprfx" "*,yes,yes,yes")]
+ mov\t%0.<Vetype>, %3/m, #%I1
+ mov\t%0.<Vetype>, %3/z, #%I1
+ fmov\t%0.<Vetype>, %3/m, #%1
+ movprfx\t%0.<Vetype>, %3/z, %0.<Vetype>\;fmov\t%0.<Vetype>, %3/m, #%1
+ movprfx\t%0, %2\;mov\t%0.<Vetype>, %3/m, #%I1
+ movprfx\t%0, %2\;fmov\t%0.<Vetype>, %3/m, #%1"
+ [(set_attr "movprfx" "*,*,*,*,yes,yes,yes")]
)
(define_insn "@aarch64_sel_dup<mode>"
@@ -1171,6 +1171,16 @@ aarch64_dbx_register_number (unsigned regno)
return DWARF_FRAME_REGISTERS;
}
+/* If X is a CONST_DOUBLE, return its bit representation as a constant
+ integer, otherwise return X unmodified. */
+static rtx
+aarch64_bit_representation (rtx x)
+{
+ if (CONST_DOUBLE_P (x))
+ x = gen_lowpart (int_mode_for_mode (GET_MODE (x)).require (), x);
+ return x;
+}
+
/* Return true if MODE is any of the Advanced SIMD structure modes. */
static bool
aarch64_advsimd_struct_mode_p (machine_mode mode)
@@ -6562,7 +6572,8 @@ aarch64_print_vector_float_operand (FILE *f, rtx x, bool negate)
if (negate)
r = real_value_negate (&r);
- /* We only handle the SVE single-bit immediates here. */
+ /* Handle the SVE single-bit immediates specially, since they have a
+ fixed form in the assembly syntax. */
if (real_equal (&r, &dconst0))
asm_fprintf (f, "0.0");
else if (real_equal (&r, &dconst2))
@@ -6572,7 +6583,13 @@ aarch64_print_vector_float_operand (FILE *f, rtx x, bool negate)
else if (real_equal (&r, &dconsthalf))
asm_fprintf (f, "0.5");
else
- return false;
+ {
+ const int buf_size = 20;
+ char float_buf[buf_size] = {'\0'};
+ real_to_decimal_for_mode (float_buf, &r, buf_size, buf_size,
+ 1, GET_MODE (elt));
+ asm_fprintf (f, "%s", float_buf);
+ }
return true;
}
@@ -6601,6 +6618,11 @@ sizetochar (int size)
and print it as an unsigned integer, in decimal.
'e': Print the sign/zero-extend size as a character 8->b,
16->h, 32->w.
+ 'I': If the operand is a duplicated vector constant,
+ replace it with the duplicated scalar. If the
+ operand is then a floating-point constant, replace
+ it with the integer bit representation. Print the
+ transformed constant as a signed decimal number.
'p': Prints N such that 2^N == X (X must be power of 2 and
const int).
'P': Print the number of non-zero bits in X (a const_int).
@@ -6727,6 +6749,19 @@ aarch64_print_operand (FILE *f, rtx x, int code)
asm_fprintf (f, "%s", reg_names [REGNO (x) + 1]);
break;
+ case 'I':
+ {
+ x = aarch64_bit_representation (unwrap_const_vec_duplicate (x));
+ if (CONST_INT_P (x))
+ asm_fprintf (f, "%wd", INTVAL (x));
+ else
+ {
+ output_operand_lossage ("invalid operand for '%%%c'", code);
+ return;
+ }
+ break;
+ }
+
case 'M':
case 'm':
{
@@ -13028,13 +13063,11 @@ aarch64_sve_bitmask_immediate_p (rtx x)
bool
aarch64_sve_dup_immediate_p (rtx x)
{
- rtx elt;
-
- if (!const_vec_duplicate_p (x, &elt)
- || !CONST_INT_P (elt))
+ x = aarch64_bit_representation (unwrap_const_vec_duplicate (x));
+ if (!CONST_INT_P (x))
return false;
- HOST_WIDE_INT val = INTVAL (elt);
+ HOST_WIDE_INT val = INTVAL (x);
if (val & 0xff)
return IN_RANGE (val, -0x80, 0x7f);
return IN_RANGE (val, -0x8000, 0x7f00);
@@ -14676,6 +14709,7 @@ aarch64_float_const_representable_p (rtx x)
REAL_VALUE_TYPE r, m;
bool fail;
+ x = unwrap_const_vec_duplicate (x);
if (!CONST_DOUBLE_P (x))
return false;
@@ -14852,15 +14886,12 @@ aarch64_output_scalar_simd_mov_immediate (rtx immediate, scalar_int_mode mode)
}
/* Return the output string to use for moving immediate CONST_VECTOR
- into an SVE register. If the move is predicated, PRED_REG is the
- number of the operand that contains the predicate register,
- otherwise it is -1. MERGE_P is true if a predicated move should
- use merge predication rather than zero predication. */
+ into an SVE register. */
char *
-aarch64_output_sve_mov_immediate (rtx const_vector, int pred_reg, bool merge_p)
+aarch64_output_sve_mov_immediate (rtx const_vector)
{
- static char templ[60];
+ static char templ[40];
struct simd_immediate_info info;
char element_char;
@@ -14904,40 +14935,14 @@ aarch64_output_sve_mov_immediate (rtx const_vector, int pred_reg, bool merge_p)
CONST_DOUBLE_REAL_VALUE (info.value),
buf_size, buf_size, 1, info.elt_mode);
- if (pred_reg == -1)
- snprintf (templ, sizeof (templ), "fmov\t%%0.%c, #%s",
- element_char, float_buf);
- else
- {
- if (merge_p)
- snprintf (templ, sizeof (templ), "fmov\t%%0.%c, %%%d/m, #%s",
- element_char,
- pred_reg,
- float_buf);
- else
- snprintf (templ, sizeof (templ),
- "movprfx\t%%0.%c, %%%d/z, %%0.%c\n"
- "\tfmov\t%%0.%c, %%%d/m, #%s",
- element_char,
- pred_reg,
- element_char,
- element_char,
- pred_reg,
- float_buf);
- }
+ snprintf (templ, sizeof (templ), "fmov\t%%0.%c, #%s",
+ element_char, float_buf);
return templ;
}
}
- if (pred_reg == -1)
- snprintf (templ, sizeof (templ), "mov\t%%0.%c, #" HOST_WIDE_INT_PRINT_DEC,
- element_char, INTVAL (info.value));
- else
- snprintf (templ, sizeof (templ),
- "mov\t%%0.%c, %%%d%s, #" HOST_WIDE_INT_PRINT_DEC,
- element_char,
- pred_reg, merge_p ? "/m" : "/z",
- INTVAL (info.value));
+ snprintf (templ, sizeof (templ), "mov\t%%0.%c, #" HOST_WIDE_INT_PRINT_DEC,
+ element_char, INTVAL (info.value));
return templ;
}
@@ -278,7 +278,7 @@
(define_constraint "Ufc"
"A floating point constant which can be used with an\
FMOV immediate operation."
- (and (match_code "const_double")
+ (and (match_code "const_double,const_vector")
(match_test "aarch64_float_const_representable_p (op)")))
(define_constraint "Uvi"
@@ -543,7 +543,8 @@
(define_predicate "aarch64_sve_dup_immediate"
(and (match_code "const,const_vector")
- (match_test "aarch64_sve_dup_immediate_p (op)")))
+ (ior (match_test "aarch64_sve_dup_immediate_p (op)")
+ (match_test "aarch64_float_const_representable_p (op)"))))
(define_predicate "aarch64_sve_dup_reg_or_imm"
(ior (match_operand 0 "register_operand")
@@ -70,7 +70,7 @@ TEST_UNIFORM_ZS (dup_h0_f16, svfloat16_t, __fp16,
/*
** dup_1_f16_m:
-** fmov z0\.h, p0/m, #1.0(e\+0)?
+** mov z0\.h, p0/m, #15360
** ret
*/
TEST_UNIFORM_Z (dup_1_f16_m, svfloat16_t,
@@ -88,7 +88,7 @@ TEST_UNIFORM_Z (dup_0_f16_m, svfloat16_t,
/*
** dup_8_f16_m:
-** fmov z0\.h, p0/m, #8.0(e\+0)?
+** mov z0\.h, p0/m, #18432
** ret
*/
TEST_UNIFORM_Z (dup_8_f16_m, svfloat16_t,
@@ -132,8 +132,7 @@ TEST_UNIFORM_ZS (dup_h0_f16_m, svfloat16_t, __fp16,
/*
** dup_1_f16_z:
-** movprfx z0\.h, p0/z, z0\.h
-** fmov z0\.h, p0/m, #1.0(e\+0)?
+** mov z0\.h, p0/z, #15360
** ret
*/
TEST_UNIFORM_Z (dup_1_f16_z, svfloat16_t,
@@ -151,8 +150,7 @@ TEST_UNIFORM_Z (dup_0_f16_z, svfloat16_t,
/*
** dup_8_f16_z:
-** movprfx z0\.h, p0/z, z0\.h
-** fmov z0\.h, p0/m, #8.0(e\+0)?
+** mov z0\.h, p0/z, #18432
** ret
*/
TEST_UNIFORM_Z (dup_8_f16_z, svfloat16_t,
@@ -631,9 +631,9 @@ TEST_UNIFORM_Z (dup_127_s16_z, svint16_t,
/*
** dup_128_s16_z:
+** mov (z[0-9]+)\.b, #0
** mov (z[0-9]+\.h), #128
-** movprfx z0\.h, p0/z, z0\.h
-** mov z0\.h, p0/m, \1
+** sel z0\.h, p0, \2, \1\.h
** ret
*/
TEST_UNIFORM_Z (dup_128_s16_z, svint16_t,
@@ -652,9 +652,9 @@ TEST_UNIFORM_Z (dup_253_s16_z, svint16_t,
/*
** dup_254_s16_z:
+** mov (z[0-9]+)\.b, #0
** mov (z[0-9]+\.h), #254
-** movprfx z0\.h, p0/z, z0\.h
-** mov z0\.h, p0/m, \1
+** sel z0\.h, p0, \2, \1\.h
** ret
*/
TEST_UNIFORM_Z (dup_254_s16_z, svint16_t,
@@ -663,9 +663,9 @@ TEST_UNIFORM_Z (dup_254_s16_z, svint16_t,
/*
** dup_255_s16_z:
+** mov (z[0-9]+)\.b, #0
** mov (z[0-9]+\.h), #255
-** movprfx z0\.h, p0/z, z0\.h
-** mov z0\.h, p0/m, \1
+** sel z0\.h, p0, \2, \1\.h
** ret
*/
TEST_UNIFORM_Z (dup_255_s16_z, svint16_t,
@@ -683,9 +683,9 @@ TEST_UNIFORM_Z (dup_256_s16_z, svint16_t,
/*
** dup_257_s16_z:
+** mov (z[0-9]+)\.b, #0
** mov (z[0-9]+)\.b, #1
-** movprfx z0\.h, p0/z, z0\.h
-** mov z0\.h, p0/m, \1\.h
+** sel z0\.h, p0, \2\.h, \1\.h
** ret
*/
TEST_UNIFORM_Z (dup_257_s16_z, svint16_t,
@@ -722,9 +722,9 @@ TEST_UNIFORM_Z (dup_7ffd_s16_z, svint16_t,
/*
** dup_7ffe_s16_z:
+** mov (z[0-9]+)\.b, #0
** mov (z[0-9]+\.h), #32766
-** movprfx z0\.h, p0/z, z0\.h
-** mov z0\.h, p0/m, \1
+** sel z0\.h, p0, \2, \1\.h
** ret
*/
TEST_UNIFORM_Z (dup_7ffe_s16_z, svint16_t,
@@ -733,9 +733,9 @@ TEST_UNIFORM_Z (dup_7ffe_s16_z, svint16_t,
/*
** dup_7fff_s16_z:
+** mov (z[0-9]+)\.b, #0
** mov (z[0-9]+\.h), #32767
-** movprfx z0\.h, p0/z, z0\.h
-** mov z0\.h, p0/m, \1
+** sel z0\.h, p0, \2, \1\.h
** ret
*/
TEST_UNIFORM_Z (dup_7fff_s16_z, svint16_t,
@@ -762,9 +762,9 @@ TEST_UNIFORM_Z (dup_m128_s16_z, svint16_t,
/*
** dup_m129_s16_z:
+** mov (z[0-9]+)\.b, #0
** mov (z[0-9]+\.h), #-129
-** movprfx z0\.h, p0/z, z0\.h
-** mov z0\.h, p0/m, \1
+** sel z0\.h, p0, \2, \1\.h
** ret
*/
TEST_UNIFORM_Z (dup_m129_s16_z, svint16_t,
@@ -783,9 +783,9 @@ TEST_UNIFORM_Z (dup_m254_s16_z, svint16_t,
/*
** dup_m255_s16_z:
+** mov (z[0-9]+)\.b, #0
** mov (z[0-9]+\.h), #-255
-** movprfx z0\.h, p0/z, z0\.h
-** mov z0\.h, p0/m, \1
+** sel z0\.h, p0, \2, \1\.h
** ret
*/
TEST_UNIFORM_Z (dup_m255_s16_z, svint16_t,
@@ -803,9 +803,9 @@ TEST_UNIFORM_Z (dup_m256_s16_z, svint16_t,
/*
** dup_m257_s16_z:
+** mov (z[0-9]+)\.b, #0
** mov (z[0-9]+\.h), #-257
-** movprfx z0\.h, p0/z, z0\.h
-** mov z0\.h, p0/m, \1
+** sel z0\.h, p0, \2, \1\.h
** ret
*/
TEST_UNIFORM_Z (dup_m257_s16_z, svint16_t,
@@ -814,9 +814,9 @@ TEST_UNIFORM_Z (dup_m257_s16_z, svint16_t,
/*
** dup_m258_s16_z:
+** mov (z[0-9]+)\.b, #0
** mov (z[0-9]+)\.b, #-2
-** movprfx z0\.h, p0/z, z0\.h
-** mov z0\.h, p0/m, \1\.h
+** sel z0\.h, p0, \2\.h, \1\.h
** ret
*/
TEST_UNIFORM_Z (dup_m258_s16_z, svint16_t,
@@ -848,9 +848,9 @@ TEST_UNIFORM_Z (dup_m7f00_s16_z, svint16_t,
/*
** dup_m7f01_s16_z:
+** mov (z[0-9]+)\.b, #0
** mov (z[0-9]+\.h), #-32513
-** movprfx z0\.h, p0/z, z0\.h
-** mov z0\.h, p0/m, \1
+** sel z0\.h, p0, \2, \1\.h
** ret
*/
TEST_UNIFORM_Z (dup_m7f01_s16_z, svint16_t,
@@ -869,9 +869,9 @@ TEST_UNIFORM_Z (dup_m7ffe_s16_z, svint16_t,
/*
** dup_m7fff_s16_z:
+** mov (z[0-9]+)\.b, #0
** mov (z[0-9]+\.h), #-32767
-** movprfx z0\.h, p0/z, z0\.h
-** mov z0\.h, p0/m, \1
+** sel z0\.h, p0, \2, \1\.h
** ret
*/
TEST_UNIFORM_Z (dup_m7fff_s16_z, svint16_t,
@@ -623,9 +623,9 @@ TEST_UNIFORM_Z (dup_127_s32_z, svint32_t,
/*
** dup_128_s32_z:
+** mov (z[0-9]+)\.b, #0
** mov (z[0-9]+\.s), #128
-** movprfx z0\.s, p0/z, z0\.s
-** mov z0\.s, p0/m, \1
+** sel z0\.s, p0, \2, \1\.s
** ret
*/
TEST_UNIFORM_Z (dup_128_s32_z, svint32_t,
@@ -644,9 +644,9 @@ TEST_UNIFORM_Z (dup_253_s32_z, svint32_t,
/*
** dup_254_s32_z:
+** mov (z[0-9]+)\.b, #0
** mov (z[0-9]+\.s), #254
-** movprfx z0\.s, p0/z, z0\.s
-** mov z0\.s, p0/m, \1
+** sel z0\.s, p0, \2, \1\.s
** ret
*/
TEST_UNIFORM_Z (dup_254_s32_z, svint32_t,
@@ -655,9 +655,9 @@ TEST_UNIFORM_Z (dup_254_s32_z, svint32_t,
/*
** dup_255_s32_z:
+** mov (z[0-9]+)\.b, #0
** mov (z[0-9]+\.s), #255
-** movprfx z0\.s, p0/z, z0\.s
-** mov z0\.s, p0/m, \1
+** sel z0\.s, p0, \2, \1\.s
** ret
*/
TEST_UNIFORM_Z (dup_255_s32_z, svint32_t,
@@ -708,9 +708,9 @@ TEST_UNIFORM_Z (dup_7ffd_s32_z, svint32_t,
/*
** dup_7ffe_s32_z:
+** mov (z[0-9]+)\.b, #0
** mov (z[0-9]+\.s), #32766
-** movprfx z0\.s, p0/z, z0\.s
-** mov z0\.s, p0/m, \1
+** sel z0\.s, p0, \2, \1\.s
** ret
*/
TEST_UNIFORM_Z (dup_7ffe_s32_z, svint32_t,
@@ -719,9 +719,9 @@ TEST_UNIFORM_Z (dup_7ffe_s32_z, svint32_t,
/*
** dup_7fff_s32_z:
+** mov (z[0-9]+)\.b, #0
** mov (z[0-9]+\.s), #32767
-** movprfx z0\.s, p0/z, z0\.s
-** mov z0\.s, p0/m, \1
+** sel z0\.s, p0, \2, \1\.s
** ret
*/
TEST_UNIFORM_Z (dup_7fff_s32_z, svint32_t,
@@ -748,9 +748,9 @@ TEST_UNIFORM_Z (dup_m128_s32_z, svint32_t,
/*
** dup_m129_s32_z:
+** mov (z[0-9]+)\.b, #0
** mov (z[0-9]+\.s), #-129
-** movprfx z0\.s, p0/z, z0\.s
-** mov z0\.s, p0/m, \1
+** sel z0\.s, p0, \2, \1\.s
** ret
*/
TEST_UNIFORM_Z (dup_m129_s32_z, svint32_t,
@@ -769,9 +769,9 @@ TEST_UNIFORM_Z (dup_m254_s32_z, svint32_t,
/*
** dup_m255_s32_z:
+** mov (z[0-9]+)\.b, #0
** mov (z[0-9]+\.s), #-255
-** movprfx z0\.s, p0/z, z0\.s
-** mov z0\.s, p0/m, \1
+** sel z0\.s, p0, \2, \1\.s
** ret
*/
TEST_UNIFORM_Z (dup_m255_s32_z, svint32_t,
@@ -789,9 +789,9 @@ TEST_UNIFORM_Z (dup_m256_s32_z, svint32_t,
/*
** dup_m257_s32_z:
+** mov (z[0-9]+)\.b, #0
** mov (z[0-9]+\.s), #-257
-** movprfx z0\.s, p0/z, z0\.s
-** mov z0\.s, p0/m, \1
+** sel z0\.s, p0, \2, \1\.s
** ret
*/
TEST_UNIFORM_Z (dup_m257_s32_z, svint32_t,
@@ -828,9 +828,9 @@ TEST_UNIFORM_Z (dup_m7f00_s32_z, svint32_t,
/*
** dup_m7f01_s32_z:
+** mov (z[0-9]+)\.b, #0
** mov (z[0-9]+\.s), #-32513
-** movprfx z0\.s, p0/z, z0\.s
-** mov z0\.s, p0/m, \1
+** sel z0\.s, p0, \2, \1\.s
** ret
*/
TEST_UNIFORM_Z (dup_m7f01_s32_z, svint32_t,
@@ -849,9 +849,9 @@ TEST_UNIFORM_Z (dup_m7ffe_s32_z, svint32_t,
/*
** dup_m7fff_s32_z:
+** mov (z[0-9]+)\.b, #0
** mov (z[0-9]+\.s), #-32767
-** movprfx z0\.s, p0/z, z0\.s
-** mov z0\.s, p0/m, \1
+** sel z0\.s, p0, \2, \1\.s
** ret
*/
TEST_UNIFORM_Z (dup_m7fff_s32_z, svint32_t,
@@ -623,9 +623,9 @@ TEST_UNIFORM_Z (dup_127_s64_z, svint64_t,
/*
** dup_128_s64_z:
+** mov (z[0-9]+)\.b, #0
** mov (z[0-9]+\.d), #128
-** movprfx z0\.d, p0/z, z0\.d
-** mov z0\.d, p0/m, \1
+** sel z0\.d, p0, \2, \1\.d
** ret
*/
TEST_UNIFORM_Z (dup_128_s64_z, svint64_t,
@@ -644,9 +644,9 @@ TEST_UNIFORM_Z (dup_253_s64_z, svint64_t,
/*
** dup_254_s64_z:
+** mov (z[0-9]+)\.b, #0
** mov (z[0-9]+\.d), #254
-** movprfx z0\.d, p0/z, z0\.d
-** mov z0\.d, p0/m, \1
+** sel z0\.d, p0, \2, \1\.d
** ret
*/
TEST_UNIFORM_Z (dup_254_s64_z, svint64_t,
@@ -655,9 +655,9 @@ TEST_UNIFORM_Z (dup_254_s64_z, svint64_t,
/*
** dup_255_s64_z:
+** mov (z[0-9]+)\.b, #0
** mov (z[0-9]+\.d), #255
-** movprfx z0\.d, p0/z, z0\.d
-** mov z0\.d, p0/m, \1
+** sel z0\.d, p0, \2, \1\.d
** ret
*/
TEST_UNIFORM_Z (dup_255_s64_z, svint64_t,
@@ -708,9 +708,9 @@ TEST_UNIFORM_Z (dup_7ffd_s64_z, svint64_t,
/*
** dup_7ffe_s64_z:
+** mov (z[0-9]+)\.b, #0
** mov (z[0-9]+\.d), #32766
-** movprfx z0\.d, p0/z, z0\.d
-** mov z0\.d, p0/m, \1
+** sel z0\.d, p0, \2, \1\.d
** ret
*/
TEST_UNIFORM_Z (dup_7ffe_s64_z, svint64_t,
@@ -719,9 +719,9 @@ TEST_UNIFORM_Z (dup_7ffe_s64_z, svint64_t,
/*
** dup_7fff_s64_z:
+** mov (z[0-9]+)\.b, #0
** mov (z[0-9]+\.d), #32767
-** movprfx z0\.d, p0/z, z0\.d
-** mov z0\.d, p0/m, \1
+** sel z0\.d, p0, \2, \1\.d
** ret
*/
TEST_UNIFORM_Z (dup_7fff_s64_z, svint64_t,
@@ -748,9 +748,9 @@ TEST_UNIFORM_Z (dup_m128_s64_z, svint64_t,
/*
** dup_m129_s64_z:
+** mov (z[0-9]+)\.b, #0
** mov (z[0-9]+\.d), #-129
-** movprfx z0\.d, p0/z, z0\.d
-** mov z0\.d, p0/m, \1
+** sel z0\.d, p0, \2, \1\.d
** ret
*/
TEST_UNIFORM_Z (dup_m129_s64_z, svint64_t,
@@ -769,9 +769,9 @@ TEST_UNIFORM_Z (dup_m254_s64_z, svint64_t,
/*
** dup_m255_s64_z:
+** mov (z[0-9]+)\.b, #0
** mov (z[0-9]+\.d), #-255
-** movprfx z0\.d, p0/z, z0\.d
-** mov z0\.d, p0/m, \1
+** sel z0\.d, p0, \2, \1\.d
** ret
*/
TEST_UNIFORM_Z (dup_m255_s64_z, svint64_t,
@@ -789,9 +789,9 @@ TEST_UNIFORM_Z (dup_m256_s64_z, svint64_t,
/*
** dup_m257_s64_z:
+** mov (z[0-9]+)\.b, #0
** mov (z[0-9]+\.d), #-257
-** movprfx z0\.d, p0/z, z0\.d
-** mov z0\.d, p0/m, \1
+** sel z0\.d, p0, \2, \1\.d
** ret
*/
TEST_UNIFORM_Z (dup_m257_s64_z, svint64_t,
@@ -828,9 +828,9 @@ TEST_UNIFORM_Z (dup_m7f00_s64_z, svint64_t,
/*
** dup_m7f01_s64_z:
+** mov (z[0-9]+)\.b, #0
** mov (z[0-9]+\.d), #-32513
-** movprfx z0\.d, p0/z, z0\.d
-** mov z0\.d, p0/m, \1
+** sel z0\.d, p0, \2, \1\.d
** ret
*/
TEST_UNIFORM_Z (dup_m7f01_s64_z, svint64_t,
@@ -849,9 +849,9 @@ TEST_UNIFORM_Z (dup_m7ffe_s64_z, svint64_t,
/*
** dup_m7fff_s64_z:
+** mov (z[0-9]+)\.b, #0
** mov (z[0-9]+\.d), #-32767
-** movprfx z0\.d, p0/z, z0\.d
-** mov z0\.d, p0/m, \1
+** sel z0\.d, p0, \2, \1\.d
** ret
*/
TEST_UNIFORM_Z (dup_m7fff_s64_z, svint64_t,
@@ -631,9 +631,9 @@ TEST_UNIFORM_Z (dup_127_u16_z, svuint16_t,
/*
** dup_128_u16_z:
+** mov (z[0-9]+)\.b, #0
** mov (z[0-9]+\.h), #128
-** movprfx z0\.h, p0/z, z0\.h
-** mov z0\.h, p0/m, \1
+** sel z0\.h, p0, \2, \1\.h
** ret
*/
TEST_UNIFORM_Z (dup_128_u16_z, svuint16_t,
@@ -652,9 +652,9 @@ TEST_UNIFORM_Z (dup_253_u16_z, svuint16_t,
/*
** dup_254_u16_z:
+** mov (z[0-9]+)\.b, #0
** mov (z[0-9]+\.h), #254
-** movprfx z0\.h, p0/z, z0\.h
-** mov z0\.h, p0/m, \1
+** sel z0\.h, p0, \2, \1\.h
** ret
*/
TEST_UNIFORM_Z (dup_254_u16_z, svuint16_t,
@@ -663,9 +663,9 @@ TEST_UNIFORM_Z (dup_254_u16_z, svuint16_t,
/*
** dup_255_u16_z:
+** mov (z[0-9]+)\.b, #0
** mov (z[0-9]+\.h), #255
-** movprfx z0\.h, p0/z, z0\.h
-** mov z0\.h, p0/m, \1
+** sel z0\.h, p0, \2, \1\.h
** ret
*/
TEST_UNIFORM_Z (dup_255_u16_z, svuint16_t,
@@ -683,9 +683,9 @@ TEST_UNIFORM_Z (dup_256_u16_z, svuint16_t,
/*
** dup_257_u16_z:
+** mov (z[0-9]+)\.b, #0
** mov (z[0-9]+)\.b, #1
-** movprfx z0\.h, p0/z, z0\.h
-** mov z0\.h, p0/m, \1\.h
+** sel z0\.h, p0, \2\.h, \1\.h
** ret
*/
TEST_UNIFORM_Z (dup_257_u16_z, svuint16_t,
@@ -722,9 +722,9 @@ TEST_UNIFORM_Z (dup_7ffd_u16_z, svuint16_t,
/*
** dup_7ffe_u16_z:
+** mov (z[0-9]+)\.b, #0
** mov (z[0-9]+\.h), #32766
-** movprfx z0\.h, p0/z, z0\.h
-** mov z0\.h, p0/m, \1
+** sel z0\.h, p0, \2, \1\.h
** ret
*/
TEST_UNIFORM_Z (dup_7ffe_u16_z, svuint16_t,
@@ -733,9 +733,9 @@ TEST_UNIFORM_Z (dup_7ffe_u16_z, svuint16_t,
/*
** dup_7fff_u16_z:
+** mov (z[0-9]+)\.b, #0
** mov (z[0-9]+\.h), #32767
-** movprfx z0\.h, p0/z, z0\.h
-** mov z0\.h, p0/m, \1
+** sel z0\.h, p0, \2, \1\.h
** ret
*/
TEST_UNIFORM_Z (dup_7fff_u16_z, svuint16_t,
@@ -762,9 +762,9 @@ TEST_UNIFORM_Z (dup_m128_u16_z, svuint16_t,
/*
** dup_m129_u16_z:
+** mov (z[0-9]+)\.b, #0
** mov (z[0-9]+\.h), #-129
-** movprfx z0\.h, p0/z, z0\.h
-** mov z0\.h, p0/m, \1
+** sel z0\.h, p0, \2, \1\.h
** ret
*/
TEST_UNIFORM_Z (dup_m129_u16_z, svuint16_t,
@@ -783,9 +783,9 @@ TEST_UNIFORM_Z (dup_m254_u16_z, svuint16_t,
/*
** dup_m255_u16_z:
+** mov (z[0-9]+)\.b, #0
** mov (z[0-9]+\.h), #-255
-** movprfx z0\.h, p0/z, z0\.h
-** mov z0\.h, p0/m, \1
+** sel z0\.h, p0, \2, \1\.h
** ret
*/
TEST_UNIFORM_Z (dup_m255_u16_z, svuint16_t,
@@ -803,9 +803,9 @@ TEST_UNIFORM_Z (dup_m256_u16_z, svuint16_t,
/*
** dup_m257_u16_z:
+** mov (z[0-9]+)\.b, #0
** mov (z[0-9]+\.h), #-257
-** movprfx z0\.h, p0/z, z0\.h
-** mov z0\.h, p0/m, \1
+** sel z0\.h, p0, \2, \1\.h
** ret
*/
TEST_UNIFORM_Z (dup_m257_u16_z, svuint16_t,
@@ -814,9 +814,9 @@ TEST_UNIFORM_Z (dup_m257_u16_z, svuint16_t,
/*
** dup_m258_u16_z:
+** mov (z[0-9]+)\.b, #0
** mov (z[0-9]+)\.b, #-2
-** movprfx z0\.h, p0/z, z0\.h
-** mov z0\.h, p0/m, \1\.h
+** sel z0\.h, p0, \2\.h, \1\.h
** ret
*/
TEST_UNIFORM_Z (dup_m258_u16_z, svuint16_t,
@@ -848,9 +848,9 @@ TEST_UNIFORM_Z (dup_m7f00_u16_z, svuint16_t,
/*
** dup_m7f01_u16_z:
+** mov (z[0-9]+)\.b, #0
** mov (z[0-9]+\.h), #-32513
-** movprfx z0\.h, p0/z, z0\.h
-** mov z0\.h, p0/m, \1
+** sel z0\.h, p0, \2, \1\.h
** ret
*/
TEST_UNIFORM_Z (dup_m7f01_u16_z, svuint16_t,
@@ -869,9 +869,9 @@ TEST_UNIFORM_Z (dup_m7ffe_u16_z, svuint16_t,
/*
** dup_m7fff_u16_z:
+** mov (z[0-9]+)\.b, #0
** mov (z[0-9]+\.h), #-32767
-** movprfx z0\.h, p0/z, z0\.h
-** mov z0\.h, p0/m, \1
+** sel z0\.h, p0, \2, \1\.h
** ret
*/
TEST_UNIFORM_Z (dup_m7fff_u16_z, svuint16_t,
@@ -623,9 +623,9 @@ TEST_UNIFORM_Z (dup_127_u32_z, svuint32_t,
/*
** dup_128_u32_z:
+** mov (z[0-9]+)\.b, #0
** mov (z[0-9]+\.s), #128
-** movprfx z0\.s, p0/z, z0\.s
-** mov z0\.s, p0/m, \1
+** sel z0\.s, p0, \2, \1\.s
** ret
*/
TEST_UNIFORM_Z (dup_128_u32_z, svuint32_t,
@@ -644,9 +644,9 @@ TEST_UNIFORM_Z (dup_253_u32_z, svuint32_t,
/*
** dup_254_u32_z:
+** mov (z[0-9]+)\.b, #0
** mov (z[0-9]+\.s), #254
-** movprfx z0\.s, p0/z, z0\.s
-** mov z0\.s, p0/m, \1
+** sel z0\.s, p0, \2, \1\.s
** ret
*/
TEST_UNIFORM_Z (dup_254_u32_z, svuint32_t,
@@ -655,9 +655,9 @@ TEST_UNIFORM_Z (dup_254_u32_z, svuint32_t,
/*
** dup_255_u32_z:
+** mov (z[0-9]+)\.b, #0
** mov (z[0-9]+\.s), #255
-** movprfx z0\.s, p0/z, z0\.s
-** mov z0\.s, p0/m, \1
+** sel z0\.s, p0, \2, \1\.s
** ret
*/
TEST_UNIFORM_Z (dup_255_u32_z, svuint32_t,
@@ -708,9 +708,9 @@ TEST_UNIFORM_Z (dup_7ffd_u32_z, svuint32_t,
/*
** dup_7ffe_u32_z:
+** mov (z[0-9]+)\.b, #0
** mov (z[0-9]+\.s), #32766
-** movprfx z0\.s, p0/z, z0\.s
-** mov z0\.s, p0/m, \1
+** sel z0\.s, p0, \2, \1\.s
** ret
*/
TEST_UNIFORM_Z (dup_7ffe_u32_z, svuint32_t,
@@ -719,9 +719,9 @@ TEST_UNIFORM_Z (dup_7ffe_u32_z, svuint32_t,
/*
** dup_7fff_u32_z:
+** mov (z[0-9]+)\.b, #0
** mov (z[0-9]+\.s), #32767
-** movprfx z0\.s, p0/z, z0\.s
-** mov z0\.s, p0/m, \1
+** sel z0\.s, p0, \2, \1\.s
** ret
*/
TEST_UNIFORM_Z (dup_7fff_u32_z, svuint32_t,
@@ -748,9 +748,9 @@ TEST_UNIFORM_Z (dup_m128_u32_z, svuint32_t,
/*
** dup_m129_u32_z:
+** mov (z[0-9]+)\.b, #0
** mov (z[0-9]+\.s), #-129
-** movprfx z0\.s, p0/z, z0\.s
-** mov z0\.s, p0/m, \1
+** sel z0\.s, p0, \2, \1\.s
** ret
*/
TEST_UNIFORM_Z (dup_m129_u32_z, svuint32_t,
@@ -769,9 +769,9 @@ TEST_UNIFORM_Z (dup_m254_u32_z, svuint32_t,
/*
** dup_m255_u32_z:
+** mov (z[0-9]+)\.b, #0
** mov (z[0-9]+\.s), #-255
-** movprfx z0\.s, p0/z, z0\.s
-** mov z0\.s, p0/m, \1
+** sel z0\.s, p0, \2, \1\.s
** ret
*/
TEST_UNIFORM_Z (dup_m255_u32_z, svuint32_t,
@@ -789,9 +789,9 @@ TEST_UNIFORM_Z (dup_m256_u32_z, svuint32_t,
/*
** dup_m257_u32_z:
+** mov (z[0-9]+)\.b, #0
** mov (z[0-9]+\.s), #-257
-** movprfx z0\.s, p0/z, z0\.s
-** mov z0\.s, p0/m, \1
+** sel z0\.s, p0, \2, \1\.s
** ret
*/
TEST_UNIFORM_Z (dup_m257_u32_z, svuint32_t,
@@ -828,9 +828,9 @@ TEST_UNIFORM_Z (dup_m7f00_u32_z, svuint32_t,
/*
** dup_m7f01_u32_z:
+** mov (z[0-9]+)\.b, #0
** mov (z[0-9]+\.s), #-32513
-** movprfx z0\.s, p0/z, z0\.s
-** mov z0\.s, p0/m, \1
+** sel z0\.s, p0, \2, \1\.s
** ret
*/
TEST_UNIFORM_Z (dup_m7f01_u32_z, svuint32_t,
@@ -849,9 +849,9 @@ TEST_UNIFORM_Z (dup_m7ffe_u32_z, svuint32_t,
/*
** dup_m7fff_u32_z:
+** mov (z[0-9]+)\.b, #0
** mov (z[0-9]+\.s), #-32767
-** movprfx z0\.s, p0/z, z0\.s
-** mov z0\.s, p0/m, \1
+** sel z0\.s, p0, \2, \1\.s
** ret
*/
TEST_UNIFORM_Z (dup_m7fff_u32_z, svuint32_t,
@@ -623,9 +623,9 @@ TEST_UNIFORM_Z (dup_127_u64_z, svuint64_t,
/*
** dup_128_u64_z:
+** mov (z[0-9]+)\.b, #0
** mov (z[0-9]+\.d), #128
-** movprfx z0\.d, p0/z, z0\.d
-** mov z0\.d, p0/m, \1
+** sel z0\.d, p0, \2, \1\.d
** ret
*/
TEST_UNIFORM_Z (dup_128_u64_z, svuint64_t,
@@ -644,9 +644,9 @@ TEST_UNIFORM_Z (dup_253_u64_z, svuint64_t,
/*
** dup_254_u64_z:
+** mov (z[0-9]+)\.b, #0
** mov (z[0-9]+\.d), #254
-** movprfx z0\.d, p0/z, z0\.d
-** mov z0\.d, p0/m, \1
+** sel z0\.d, p0, \2, \1\.d
** ret
*/
TEST_UNIFORM_Z (dup_254_u64_z, svuint64_t,
@@ -655,9 +655,9 @@ TEST_UNIFORM_Z (dup_254_u64_z, svuint64_t,
/*
** dup_255_u64_z:
+** mov (z[0-9]+)\.b, #0
** mov (z[0-9]+\.d), #255
-** movprfx z0\.d, p0/z, z0\.d
-** mov z0\.d, p0/m, \1
+** sel z0\.d, p0, \2, \1\.d
** ret
*/
TEST_UNIFORM_Z (dup_255_u64_z, svuint64_t,
@@ -708,9 +708,9 @@ TEST_UNIFORM_Z (dup_7ffd_u64_z, svuint64_t,
/*
** dup_7ffe_u64_z:
+** mov (z[0-9]+)\.b, #0
** mov (z[0-9]+\.d), #32766
-** movprfx z0\.d, p0/z, z0\.d
-** mov z0\.d, p0/m, \1
+** sel z0\.d, p0, \2, \1\.d
** ret
*/
TEST_UNIFORM_Z (dup_7ffe_u64_z, svuint64_t,
@@ -719,9 +719,9 @@ TEST_UNIFORM_Z (dup_7ffe_u64_z, svuint64_t,
/*
** dup_7fff_u64_z:
+** mov (z[0-9]+)\.b, #0
** mov (z[0-9]+\.d), #32767
-** movprfx z0\.d, p0/z, z0\.d
-** mov z0\.d, p0/m, \1
+** sel z0\.d, p0, \2, \1\.d
** ret
*/
TEST_UNIFORM_Z (dup_7fff_u64_z, svuint64_t,
@@ -748,9 +748,9 @@ TEST_UNIFORM_Z (dup_m128_u64_z, svuint64_t,
/*
** dup_m129_u64_z:
+** mov (z[0-9]+)\.b, #0
** mov (z[0-9]+\.d), #-129
-** movprfx z0\.d, p0/z, z0\.d
-** mov z0\.d, p0/m, \1
+** sel z0\.d, p0, \2, \1\.d
** ret
*/
TEST_UNIFORM_Z (dup_m129_u64_z, svuint64_t,
@@ -769,9 +769,9 @@ TEST_UNIFORM_Z (dup_m254_u64_z, svuint64_t,
/*
** dup_m255_u64_z:
+** mov (z[0-9]+)\.b, #0
** mov (z[0-9]+\.d), #-255
-** movprfx z0\.d, p0/z, z0\.d
-** mov z0\.d, p0/m, \1
+** sel z0\.d, p0, \2, \1\.d
** ret
*/
TEST_UNIFORM_Z (dup_m255_u64_z, svuint64_t,
@@ -789,9 +789,9 @@ TEST_UNIFORM_Z (dup_m256_u64_z, svuint64_t,
/*
** dup_m257_u64_z:
+** mov (z[0-9]+)\.b, #0
** mov (z[0-9]+\.d), #-257
-** movprfx z0\.d, p0/z, z0\.d
-** mov z0\.d, p0/m, \1
+** sel z0\.d, p0, \2, \1\.d
** ret
*/
TEST_UNIFORM_Z (dup_m257_u64_z, svuint64_t,
@@ -828,9 +828,9 @@ TEST_UNIFORM_Z (dup_m7f00_u64_z, svuint64_t,
/*
** dup_m7f01_u64_z:
+** mov (z[0-9]+)\.b, #0
** mov (z[0-9]+\.d), #-32513
-** movprfx z0\.d, p0/z, z0\.d
-** mov z0\.d, p0/m, \1
+** sel z0\.d, p0, \2, \1\.d
** ret
*/
TEST_UNIFORM_Z (dup_m7f01_u64_z, svuint64_t,
@@ -849,9 +849,9 @@ TEST_UNIFORM_Z (dup_m7ffe_u64_z, svuint64_t,
/*
** dup_m7fff_u64_z:
+** mov (z[0-9]+)\.b, #0
** mov (z[0-9]+\.d), #-32767
-** movprfx z0\.d, p0/z, z0\.d
-** mov z0\.d, p0/m, \1
+** sel z0\.d, p0, \2, \1\.d
** ret
*/
TEST_UNIFORM_Z (dup_m7fff_u64_z, svuint64_t,