@@ -22895,19 +22895,19 @@ aarch64_advsimd_valid_immediate_hs (unsigned int val32,
return false;
}
-/* Return true if replicating VAL64 is a valid immediate for the
+/* Return true if replicating VAL64 with mode MODE is a valid immediate for the
Advanced SIMD operation described by WHICH. If INFO is nonnull,
use it to describe valid immediates. */
static bool
aarch64_advsimd_valid_immediate (unsigned HOST_WIDE_INT val64,
+ scalar_int_mode mode,
simd_immediate_info *info,
enum simd_immediate_check which)
{
unsigned int val32 = val64 & 0xffffffff;
- unsigned int val16 = val64 & 0xffff;
unsigned int val8 = val64 & 0xff;
- if (val32 == (val64 >> 32))
+ if (mode != DImode)
{
if ((which & AARCH64_CHECK_ORR) != 0
&& aarch64_advsimd_valid_immediate_hs (val32, info, which,
@@ -22919,10 +22919,9 @@ aarch64_advsimd_valid_immediate (unsigned HOST_WIDE_INT val64,
simd_immediate_info::MVN))
return true;
+
/* Try using a replicated byte. */
- if (which == AARCH64_CHECK_MOV
- && val16 == (val32 >> 16)
- && val8 == (val16 >> 8))
+ if (which == AARCH64_CHECK_MOV && mode == QImode)
{
if (info)
*info = simd_immediate_info (QImode, val8);
@@ -22950,28 +22949,14 @@ aarch64_advsimd_valid_immediate (unsigned HOST_WIDE_INT val64,
return false;
}
-/* Return true if replicating VAL64 gives a valid immediate for an SVE MOV
+/* Return true if replicating IVAL with MODE gives a valid immediate for an SVE MOV
instruction. If INFO is nonnull, use it to describe valid immediates. */
static bool
-aarch64_sve_valid_immediate (unsigned HOST_WIDE_INT val64,
+aarch64_sve_valid_immediate (unsigned HOST_WIDE_INT ival, scalar_int_mode mode,
simd_immediate_info *info)
{
- scalar_int_mode mode = DImode;
- unsigned int val32 = val64 & 0xffffffff;
- if (val32 == (val64 >> 32))
- {
- mode = SImode;
- unsigned int val16 = val32 & 0xffff;
- if (val16 == (val32 >> 16))
- {
- mode = HImode;
- unsigned int val8 = val16 & 0xff;
- if (val8 == (val16 >> 8))
- mode = QImode;
- }
- }
- HOST_WIDE_INT val = trunc_int_for_mode (val64, mode);
+ HOST_WIDE_INT val = trunc_int_for_mode (ival, mode);
if (IN_RANGE (val, -0x80, 0x7f))
{
/* DUP with no shift. */
@@ -22986,7 +22971,7 @@ aarch64_sve_valid_immediate (unsigned HOST_WIDE_INT val64,
*info = simd_immediate_info (mode, val);
return true;
}
- if (aarch64_bitmask_imm (val64, mode))
+ if (aarch64_bitmask_imm (ival, mode))
{
/* DUPM. */
if (info)
@@ -23067,6 +23052,91 @@ aarch64_sve_pred_valid_immediate (rtx x, simd_immediate_info *info)
return false;
}
+/* We can only represent floating point constants which will fit in
+ "quarter-precision" values. These values are characterised by
+ a sign bit, a 4-bit mantissa and a 3-bit exponent. And are given
+ by:
+
+ (-1)^s * (n/16) * 2^r
+
+ Where:
+ 's' is the sign bit.
+ 'n' is an integer in the range 16 <= n <= 31.
+ 'r' is an integer in the range -3 <= r <= 4.
+
+ Return true iff R represents a vale encodable into an AArch64 floating point
+ move instruction as an immediate. Othewise false. */
+
+static bool
+aarch64_real_float_const_representable_p (REAL_VALUE_TYPE r)
+{
+ /* This represents our current view of how many bits
+ make up the mantissa. */
+ int point_pos = 2 * HOST_BITS_PER_WIDE_INT - 1;
+ int exponent;
+ unsigned HOST_WIDE_INT mantissa, mask;
+ REAL_VALUE_TYPE m;
+ bool fail = false;
+
+ /* We cannot represent infinities, NaNs or +/-zero. We won't
+ know if we have +zero until we analyse the mantissa, but we
+ can reject the other invalid values. */
+ if (REAL_VALUE_ISINF (r) || REAL_VALUE_ISNAN (r)
+ || REAL_VALUE_MINUS_ZERO (r))
+ return false;
+
+ /* Extract exponent. */
+ r = real_value_abs (&r);
+ exponent = REAL_EXP (&r);
+
+ /* For the mantissa, we expand into two HOST_WIDE_INTS, apart from the
+ highest (sign) bit, with a fixed binary point at bit point_pos.
+ m1 holds the low part of the mantissa, m2 the high part.
+ WARNING: If we ever have a representation using more than 2 * H_W_I - 1
+ bits for the mantissa, this can fail (low bits will be lost). */
+ real_ldexp (&m, &r, point_pos - exponent);
+ wide_int w = real_to_integer (&m, &fail, HOST_BITS_PER_WIDE_INT * 2);
+
+ /* If the low part of the mantissa has bits set we cannot represent
+ the value. */
+ if (fail || w.ulow () != 0)
+ return false;
+
+ /* We have rejected the lower HOST_WIDE_INT, so update our
+ understanding of how many bits lie in the mantissa and
+ look only at the high HOST_WIDE_INT. */
+ mantissa = w.elt (1);
+ point_pos -= HOST_BITS_PER_WIDE_INT;
+
+ /* We can only represent values with a mantissa of the form 1.xxxx. */
+ mask = ((unsigned HOST_WIDE_INT)1 << (point_pos - 5)) - 1;
+ if ((mantissa & mask) != 0)
+ return false;
+
+ /* Having filtered unrepresentable values, we may now remove all
+ but the highest 5 bits. */
+ mantissa >>= point_pos - 5;
+
+ /* We cannot represent the value 0.0, so reject it. This is handled
+ elsewhere. */
+ if (mantissa == 0)
+ return false;
+
+ /* Then, as bit 4 is always set, we can mask it off, leaving
+ the mantissa in the range [0, 15]. */
+ mantissa &= ~(1 << 4);
+ gcc_assert (mantissa <= 15);
+
+ /* GCC internally does not use IEEE754-like encoding (where normalized
+ significands are in the range [1, 2). GCC uses [0.5, 1) (see real.cc).
+ Our mantissa values are shifted 4 places to the left relative to
+ normalized IEEE754 so we must modify the exponent returned by REAL_EXP
+ by 5 places to correct for GCC's representation. */
+ exponent = 5 - exponent;
+
+ return (exponent >= 0 && exponent <= 7);
+}
+
/* Return true if OP is a valid SIMD immediate for the operation
described by WHICH. If INFO is nonnull, use it to describe valid
immediates. */
@@ -23120,20 +23190,6 @@ aarch64_simd_valid_immediate (rtx op, simd_immediate_info *info,
else
return false;
- scalar_float_mode elt_float_mode;
- if (n_elts == 1
- && is_a <scalar_float_mode> (elt_mode, &elt_float_mode))
- {
- rtx elt = CONST_VECTOR_ENCODED_ELT (op, 0);
- if (aarch64_float_const_zero_rtx_p (elt)
- || aarch64_float_const_representable_p (elt))
- {
- if (info)
- *info = simd_immediate_info (elt_float_mode, elt);
- return true;
- }
- }
-
/* If all elements in an SVE vector have the same value, we have a free
choice between using the element mode and using the container mode.
Using the element mode means that unused parts of the vector are
@@ -23195,10 +23251,57 @@ aarch64_simd_valid_immediate (rtx op, simd_immediate_info *info,
val64 |= ((unsigned HOST_WIDE_INT) bytes[i % nbytes]
<< (i * BITS_PER_UNIT));
+ /* Try encoding the integer immediate as a floating point value if it's an exact
+ value. */
+ scalar_float_mode fmode = DFmode;
+ scalar_int_mode imode = DImode;
+ unsigned HOST_WIDE_INT ival = val64;
+ unsigned int val32 = val64 & 0xffffffff;
+ if (val32 == (val64 >> 32))
+ {
+ fmode = SFmode;
+ imode = SImode;
+ ival = val32;
+ unsigned int val16 = val32 & 0xffff;
+ if (val16 == (val32 >> 16))
+ {
+ fmode = HFmode;
+ imode = HImode;
+ ival = val16;
+ unsigned int val8 = val16 & 0xff;
+ if (val8 == (val16 >> 8))
+ {
+ imode = QImode;
+ ival = val8;
+ }
+ }
+ }
+
+ if (which == AARCH64_CHECK_MOV
+ && imode != QImode
+ && (imode != HImode || TARGET_FP_F16INST))
+ {
+ long int as_long_ints[2];
+ as_long_ints[0] = ival & 0xFFFFFFFF;
+ as_long_ints[1] = (ival >> 32) & 0xFFFFFFFF;
+
+ REAL_VALUE_TYPE r;
+ real_from_target (&r, as_long_ints, fmode);
+ if (aarch64_real_float_const_representable_p (r))
+ {
+ if (info)
+ {
+ rtx float_val = const_double_from_real_value (r, fmode);
+ *info = simd_immediate_info (fmode, float_val);
+ }
+ return true;
+ }
+ }
+
if (vec_flags & VEC_SVE_DATA)
- return aarch64_sve_valid_immediate (val64, info);
+ return aarch64_sve_valid_immediate (ival, imode, info);
else
- return aarch64_advsimd_valid_immediate (val64, info, which);
+ return aarch64_advsimd_valid_immediate (val64, imode, info, which);
}
/* Check whether X is a VEC_SERIES-like constant that starts at 0 and
@@ -25201,106 +25304,32 @@ aarch64_c_mode_for_suffix (char suffix)
return VOIDmode;
}
-/* We can only represent floating point constants which will fit in
- "quarter-precision" values. These values are characterised by
- a sign bit, a 4-bit mantissa and a 3-bit exponent. And are given
- by:
-
- (-1)^s * (n/16) * 2^r
-
- Where:
- 's' is the sign bit.
- 'n' is an integer in the range 16 <= n <= 31.
- 'r' is an integer in the range -3 <= r <= 4. */
-
-/* Return true iff X can be represented by a quarter-precision
+/* Return true iff X with mode MODE can be represented by a quarter-precision
floating point immediate operand X. Note, we cannot represent 0.0. */
+
bool
aarch64_float_const_representable_p (rtx x)
{
- /* This represents our current view of how many bits
- make up the mantissa. */
- int point_pos = 2 * HOST_BITS_PER_WIDE_INT - 1;
- int exponent;
- unsigned HOST_WIDE_INT mantissa, mask;
- REAL_VALUE_TYPE r, m;
- bool fail;
-
x = unwrap_const_vec_duplicate (x);
+ machine_mode mode = GET_MODE (x);
if (!CONST_DOUBLE_P (x))
return false;
- if (GET_MODE (x) == VOIDmode
- || (GET_MODE (x) == HFmode && !TARGET_FP_F16INST))
+ if (mode == HFmode && !TARGET_FP_F16INST)
return false;
- r = *CONST_DOUBLE_REAL_VALUE (x);
-
- /* We cannot represent infinities, NaNs or +/-zero. We won't
- know if we have +zero until we analyse the mantissa, but we
- can reject the other invalid values. */
- if (REAL_VALUE_ISINF (r) || REAL_VALUE_ISNAN (r)
- || REAL_VALUE_MINUS_ZERO (r))
- return false;
+ REAL_VALUE_TYPE r = *CONST_DOUBLE_REAL_VALUE (x);
/* For BFmode, only handle 0.0. */
- if (GET_MODE (x) == BFmode)
+ if (mode == BFmode)
return real_iszero (&r, false);
- /* Extract exponent. */
- r = real_value_abs (&r);
- exponent = REAL_EXP (&r);
-
- /* For the mantissa, we expand into two HOST_WIDE_INTS, apart from the
- highest (sign) bit, with a fixed binary point at bit point_pos.
- m1 holds the low part of the mantissa, m2 the high part.
- WARNING: If we ever have a representation using more than 2 * H_W_I - 1
- bits for the mantissa, this can fail (low bits will be lost). */
- real_ldexp (&m, &r, point_pos - exponent);
- wide_int w = real_to_integer (&m, &fail, HOST_BITS_PER_WIDE_INT * 2);
-
- /* If the low part of the mantissa has bits set we cannot represent
- the value. */
- if (w.ulow () != 0)
- return false;
- /* We have rejected the lower HOST_WIDE_INT, so update our
- understanding of how many bits lie in the mantissa and
- look only at the high HOST_WIDE_INT. */
- mantissa = w.elt (1);
- point_pos -= HOST_BITS_PER_WIDE_INT;
-
- /* We can only represent values with a mantissa of the form 1.xxxx. */
- mask = ((unsigned HOST_WIDE_INT)1 << (point_pos - 5)) - 1;
- if ((mantissa & mask) != 0)
- return false;
-
- /* Having filtered unrepresentable values, we may now remove all
- but the highest 5 bits. */
- mantissa >>= point_pos - 5;
-
- /* We cannot represent the value 0.0, so reject it. This is handled
- elsewhere. */
- if (mantissa == 0)
- return false;
-
- /* Then, as bit 4 is always set, we can mask it off, leaving
- the mantissa in the range [0, 15]. */
- mantissa &= ~(1 << 4);
- gcc_assert (mantissa <= 15);
-
- /* GCC internally does not use IEEE754-like encoding (where normalized
- significands are in the range [1, 2). GCC uses [0.5, 1) (see real.cc).
- Our mantissa values are shifted 4 places to the left relative to
- normalized IEEE754 so we must modify the exponent returned by REAL_EXP
- by 5 places to correct for GCC's representation. */
- exponent = 5 - exponent;
-
- return (exponent >= 0 && exponent <= 7);
+ return aarch64_real_float_const_representable_p (r);
}
-/* Returns the string with the instruction for AdvSIMD MOVI, MVNI, ORR or BIC
- immediate with a CONST_VECTOR of MODE and WIDTH. WHICH selects whether to
- output MOVI/MVNI, ORR or BIC immediate. */
+/* Returns the string with the instruction for AdvSIMD MOVI, MVNI, ORR, BIC or
+ FMOV immediate with a CONST_VECTOR of MODE and WIDTH. WHICH selects whether
+ to output MOVI/MVNI, ORR or BIC immediate. */
char*
aarch64_output_simd_mov_immediate (rtx const_vector, unsigned width,
enum simd_immediate_check which)
new file mode 100644
@@ -0,0 +1,87 @@
+/* { dg-do compile } */
+/* { dg-additional-options "-march=armv9-a -Ofast" } */
+/* { dg-final { check-function-bodies "**" "" "" } } */
+
+#include <arm_neon.h>
+
+/*
+** g:
+** fmov v0\.4s, 1\.0e\+0
+** ret
+*/
+float32x4_t g(){
+ return vdupq_n_f32(1);
+}
+
+/*
+** h:
+** fmov v0\.4s, 1\.0e\+0
+** ret
+*/
+uint32x4_t h() {
+ return vreinterpretq_u32_f32(g());
+}
+
+/*
+** f1:
+** fmov v0\.4s, 1\.0e\+0
+** ret
+*/
+uint32x4_t f1() {
+ return vdupq_n_u32(0x3f800000);
+}
+
+/*
+** f2:
+** fmov v0\.4s, 1\.5e\+0
+** ret
+*/
+uint32x4_t f2() {
+ return vdupq_n_u32(0x3FC00000);
+}
+
+/*
+** f3:
+** fmov v0\.4s, 1\.25e\+0
+** ret
+*/
+uint32x4_t f3() {
+ return vdupq_n_u32(0x3FA00000);
+}
+
+/*
+** f4:
+** fmov v0\.2d, 1\.0e\+0
+** ret
+*/
+uint64x2_t f4() {
+ return vdupq_n_u64(0x3FF0000000000000);
+}
+
+/*
+** fn4:
+** fmov v0\.2d, -1\.0e\+0
+** ret
+*/
+uint64x2_t fn4() {
+ return vdupq_n_u64(0xBFF0000000000000);
+}
+
+/*
+** f5:
+** fmov v0\.8h, 1\.5e\+0
+** ret
+*/
+uint16x8_t f5() {
+ return vdupq_n_u16(0x3E00);
+}
+
+/*
+** f6:
+** adrp x0, \.LC0
+** ldr q0, \[x0, #:lo12:\.LC0\]
+** ret
+*/
+uint32x4_t f6() {
+ return vdupq_n_u32(0x4f800000);
+}