diff mbox series

[2/3] AArch64: support encoding integer immediates using floating point moves

Message ID Zwz4YsLNzknp/BdD@arm.com
State New
Headers show
Series [1/3] AArch64: update testsuite to account for new zero moves | expand

Commit Message

Tamar Christina Oct. 14, 2024, 10:54 a.m. UTC
Hi All,

This patch extends our immediate SIMD generation cases to support generating
integer immediates using floating point operation if the integer immediate maps
to an exact FP value.

As an example:

uint32x4_t f1() {
    return vdupq_n_u32(0x3f800000);
}

currently generates:

f1:
        adrp    x0, .LC0
        ldr     q0, [x0, #:lo12:.LC0]
        ret

i.e. a load, but with this change:

f1:
        fmov    v0.4s, 1.0e+0
        ret

Such immediates are common in e.g. our Math routines in glibc because they are
created to extract or mark part of an FP immediate as masks.

Bootstrapped Regtested on aarch64-none-linux-gnu and no issues.

Ok for master?

Thanks,
Tamar

gcc/ChangeLog:

gcc/ChangeLog:

	* config/aarch64/aarch64.cc (aarch64_sve_valid_immediate,
	aarch64_simd_valid_immediate): Refactor accepting modes and values.
	(aarch64_float_const_representable_p): Refactor and extract FP checks
	into ...
	(aarch64_real_float_const_representable_p): ...This.
	(aarch64_advsimd_valid_immediate): Use it.

gcc/testsuite/ChangeLog:

	* gcc.target/aarch64/const_create_using_fmov.c: New test.

---




--

Comments

Richard Sandiford Oct. 14, 2024, 6:27 p.m. UTC | #1
Tamar Christina <tamar.christina@arm.com> writes:
> Hi All,
>
> This patch extends our immediate SIMD generation cases to support generating
> integer immediates using floating point operation if the integer immediate maps
> to an exact FP value.
>
> As an example:
>
> uint32x4_t f1() {
>     return vdupq_n_u32(0x3f800000);
> }
>
> currently generates:
>
> f1:
>         adrp    x0, .LC0
>         ldr     q0, [x0, #:lo12:.LC0]
>         ret
>
> i.e. a load, but with this change:
>
> f1:
>         fmov    v0.4s, 1.0e+0
>         ret
>
> Such immediates are common in e.g. our Math routines in glibc because they are
> created to extract or mark part of an FP immediate as masks.
>
> Bootstrapped Regtested on aarch64-none-linux-gnu and no issues.
>
> Ok for master?
>
> Thanks,
> Tamar
>
> gcc/ChangeLog:
>
> gcc/ChangeLog:
>
> 	* config/aarch64/aarch64.cc (aarch64_sve_valid_immediate,
> 	aarch64_simd_valid_immediate): Refactor accepting modes and values.
> 	(aarch64_float_const_representable_p): Refactor and extract FP checks
> 	into ...
> 	(aarch64_real_float_const_representable_p): ...This.
> 	(aarch64_advsimd_valid_immediate): Use it.
>
> gcc/testsuite/ChangeLog:
>
> 	* gcc.target/aarch64/const_create_using_fmov.c: New test.

Looks good.  I think it's worth mentioning in the changelog that you
also fix the previous behaviour of ignoring the "fail" pass-back from
real_to_integer.

Some formatting trivia below, and a question:

> @@ -22919,10 +22919,9 @@ aarch64_advsimd_valid_immediate (unsigned HOST_WIDE_INT val64,
>  						 simd_immediate_info::MVN))
>  	return true;
>  
> +

Excess blank line.

>        /* Try using a replicated byte.  */
> -      if (which == AARCH64_CHECK_MOV
> -	  && val16 == (val32 >> 16)
> -	  && val8 == (val16 >> 8))
> +      if (which == AARCH64_CHECK_MOV && mode == QImode)
>  	{
>  	  if (info)
>  	    *info = simd_immediate_info (QImode, val8);
> @@ -22950,28 +22949,14 @@ aarch64_advsimd_valid_immediate (unsigned HOST_WIDE_INT val64,
>    return false;
>  }
>  
> -/* Return true if replicating VAL64 gives a valid immediate for an SVE MOV
> +/* Return true if replicating IVAL with MODE gives a valid immediate for an SVE MOV

Long line.

>     instruction.  If INFO is nonnull, use it to describe valid immediates.  */
>  
>  static bool
> -aarch64_sve_valid_immediate (unsigned HOST_WIDE_INT val64,
> +aarch64_sve_valid_immediate (unsigned HOST_WIDE_INT ival, scalar_int_mode mode,
>  			     simd_immediate_info *info)
>  {
> -  scalar_int_mode mode = DImode;
> -  unsigned int val32 = val64 & 0xffffffff;
> -  if (val32 == (val64 >> 32))
> -    {
> -      mode = SImode;
> -      unsigned int val16 = val32 & 0xffff;
> -      if (val16 == (val32 >> 16))
> -	{
> -	  mode = HImode;
> -	  unsigned int val8 = val16 & 0xff;
> -	  if (val8 == (val16 >> 8))
> -	    mode = QImode;
> -	}
> -    }
> -  HOST_WIDE_INT val = trunc_int_for_mode (val64, mode);
> +  HOST_WIDE_INT val = trunc_int_for_mode (ival, mode);
>    if (IN_RANGE (val, -0x80, 0x7f))
>      {
>        /* DUP with no shift.  */
> @@ -22986,7 +22971,7 @@ aarch64_sve_valid_immediate (unsigned HOST_WIDE_INT val64,
>  	*info = simd_immediate_info (mode, val);
>        return true;
>      }
> -  if (aarch64_bitmask_imm (val64, mode))
> +  if (aarch64_bitmask_imm (ival, mode))
>      {
>        /* DUPM.  */
>        if (info)
> @@ -23067,6 +23052,91 @@ aarch64_sve_pred_valid_immediate (rtx x, simd_immediate_info *info)
>    return false;
>  }
>  
> +/* We can only represent floating point constants which will fit in
> +   "quarter-precision" values.  These values are characterised by
> +   a sign bit, a 4-bit mantissa and a 3-bit exponent.  And are given
> +   by:
> +
> +   (-1)^s * (n/16) * 2^r
> +
> +   Where:
> +     's' is the sign bit.
> +     'n' is an integer in the range 16 <= n <= 31.
> +     'r' is an integer in the range -3 <= r <= 4.
> +
> +   Return true iff R represents a vale encodable into an AArch64 floating point
> +   move instruction as an immediate.  Othewise false.  */
> +
> +static bool
> +aarch64_real_float_const_representable_p (REAL_VALUE_TYPE r)
> +{
> +  /* This represents our current view of how many bits
> +     make up the mantissa.  */
> +  int point_pos = 2 * HOST_BITS_PER_WIDE_INT - 1;
> +  int exponent;
> +  unsigned HOST_WIDE_INT mantissa, mask;
> +  REAL_VALUE_TYPE m;
> +  bool fail = false;
> +
> + /* We cannot represent infinities, NaNs or +/-zero.  We won't

Should be indented by two spaces.

> +     know if we have +zero until we analyse the mantissa, but we
> +     can reject the other invalid values.  */
> +  if (REAL_VALUE_ISINF (r) || REAL_VALUE_ISNAN (r)
> +      || REAL_VALUE_MINUS_ZERO (r))
> +    return false;
> [...]
> @@ -23195,10 +23251,57 @@ aarch64_simd_valid_immediate (rtx op, simd_immediate_info *info,
>      val64 |= ((unsigned HOST_WIDE_INT) bytes[i % nbytes]
>  	      << (i * BITS_PER_UNIT));
>  
> +  /* Try encoding the integer immediate as a floating point value if it's an exact

Long line.

> +     value.  */
> +  scalar_float_mode fmode = DFmode;
> +  scalar_int_mode imode = DImode;
> +  unsigned HOST_WIDE_INT ival = val64;
> +  unsigned int val32 = val64 & 0xffffffff;
> +  if (val32 == (val64 >> 32))
> +    {
> +      fmode = SFmode;
> +      imode = SImode;
> +      ival = val32;
> +      unsigned int val16 = val32 & 0xffff;
> +      if (val16 == (val32 >> 16))
> +	{
> +	  fmode = HFmode;
> +	  imode = HImode;
> +	  ival = val16;
> +	  unsigned int val8 = val16 & 0xff;
> +	  if (val8 == (val16 >> 8))
> +	    {
> +	      imode = QImode;
> +	      ival = val8;
> +	    }
> +	}
> +    }
> +
> +  if (which == AARCH64_CHECK_MOV
> +      && imode != QImode
> +      && (imode != HImode || TARGET_FP_F16INST))
> +    {
> +      long int as_long_ints[2];
> +      as_long_ints[0] = ival & 0xFFFFFFFF;
> +      as_long_ints[1] = (ival >> 32) & 0xFFFFFFFF;
> +
> +      REAL_VALUE_TYPE r;
> +      real_from_target (&r, as_long_ints, fmode);
> +      if (aarch64_real_float_const_representable_p (r))
> +	{
> +	  if (info)
> +	    {
> +	      rtx float_val = const_double_from_real_value (r, fmode);
> +	      *info = simd_immediate_info (fmode, float_val);
> +	    }
> +	  return true;
> +	}
> +    }
> +
>    if (vec_flags & VEC_SVE_DATA)
> -    return aarch64_sve_valid_immediate (val64, info);
> +    return aarch64_sve_valid_immediate (ival, imode, info);
>    else
> -    return aarch64_advsimd_valid_immediate (val64, info, which);
> +    return aarch64_advsimd_valid_immediate (val64, imode, info, which);
>  }
>  
>  /* Check whether X is a VEC_SERIES-like constant that starts at 0 and
> @@ -25201,106 +25304,32 @@ aarch64_c_mode_for_suffix (char suffix)
>    return VOIDmode;
>  }
>  
> -/* We can only represent floating point constants which will fit in
> -   "quarter-precision" values.  These values are characterised by
> -   a sign bit, a 4-bit mantissa and a 3-bit exponent.  And are given
> -   by:
> -
> -   (-1)^s * (n/16) * 2^r
> -
> -   Where:
> -     's' is the sign bit.
> -     'n' is an integer in the range 16 <= n <= 31.
> -     'r' is an integer in the range -3 <= r <= 4.  */
> -
> -/* Return true iff X can be represented by a quarter-precision
> +/* Return true iff X with mode MODE can be represented by a quarter-precision
>     floating point immediate operand X.  Note, we cannot represent 0.0.  */

OK with the changes above.

This is pre-existing, but I suppose the comment is out of date, since...

> +
>  bool
>  aarch64_float_const_representable_p (rtx x)
>  {
> -  /* This represents our current view of how many bits
> -     make up the mantissa.  */
> -  int point_pos = 2 * HOST_BITS_PER_WIDE_INT - 1;
> -  int exponent;
> -  unsigned HOST_WIDE_INT mantissa, mask;
> -  REAL_VALUE_TYPE r, m;
> -  bool fail;
> -
>    x = unwrap_const_vec_duplicate (x);
> +  machine_mode mode = GET_MODE (x);
>    if (!CONST_DOUBLE_P (x))
>      return false;
>  
> -  if (GET_MODE (x) == VOIDmode
> -      || (GET_MODE (x) == HFmode && !TARGET_FP_F16INST))
> +  if (mode == HFmode && !TARGET_FP_F16INST)
>      return false;
>  
> -  r = *CONST_DOUBLE_REAL_VALUE (x);
> -
> -  /* We cannot represent infinities, NaNs or +/-zero.  We won't
> -     know if we have +zero until we analyse the mantissa, but we
> -     can reject the other invalid values.  */
> -  if (REAL_VALUE_ISINF (r) || REAL_VALUE_ISNAN (r)
> -      || REAL_VALUE_MINUS_ZERO (r))
> -    return false;
> +    REAL_VALUE_TYPE r = *CONST_DOUBLE_REAL_VALUE (x);
>  
>    /* For BFmode, only handle 0.0. */
> -  if (GET_MODE (x) == BFmode)
> +  if (mode == BFmode)
>      return real_iszero (&r, false);

...we do now handle zero for BFmode.  But that's probably a bug.
With this patch, it should be "even more true" that this routine
isn't supposed to handle zero, although it was also true before.

Does everything work with the final line above as "return false" instead?
If so, that's ok as part of this patch, as a separate patch, or not at
all if you'd rather leave it.

Thanks,
Richard
diff mbox series

Patch

diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc
index 102680a0efca1ce928e6945033c01cfb68a65152..9142a1eb319c9d596eaa8ab723c5abea60818438 100644
--- a/gcc/config/aarch64/aarch64.cc
+++ b/gcc/config/aarch64/aarch64.cc
@@ -22895,19 +22895,19 @@  aarch64_advsimd_valid_immediate_hs (unsigned int val32,
   return false;
 }
 
-/* Return true if replicating VAL64 is a valid immediate for the
+/* Return true if replicating VAL64 with mode MODE is a valid immediate for the
    Advanced SIMD operation described by WHICH.  If INFO is nonnull,
    use it to describe valid immediates.  */
 static bool
 aarch64_advsimd_valid_immediate (unsigned HOST_WIDE_INT val64,
+				 scalar_int_mode mode,
 				 simd_immediate_info *info,
 				 enum simd_immediate_check which)
 {
   unsigned int val32 = val64 & 0xffffffff;
-  unsigned int val16 = val64 & 0xffff;
   unsigned int val8 = val64 & 0xff;
 
-  if (val32 == (val64 >> 32))
+  if (mode != DImode)
     {
       if ((which & AARCH64_CHECK_ORR) != 0
 	  && aarch64_advsimd_valid_immediate_hs (val32, info, which,
@@ -22919,10 +22919,9 @@  aarch64_advsimd_valid_immediate (unsigned HOST_WIDE_INT val64,
 						 simd_immediate_info::MVN))
 	return true;
 
+
       /* Try using a replicated byte.  */
-      if (which == AARCH64_CHECK_MOV
-	  && val16 == (val32 >> 16)
-	  && val8 == (val16 >> 8))
+      if (which == AARCH64_CHECK_MOV && mode == QImode)
 	{
 	  if (info)
 	    *info = simd_immediate_info (QImode, val8);
@@ -22950,28 +22949,14 @@  aarch64_advsimd_valid_immediate (unsigned HOST_WIDE_INT val64,
   return false;
 }
 
-/* Return true if replicating VAL64 gives a valid immediate for an SVE MOV
+/* Return true if replicating IVAL with MODE gives a valid immediate for an SVE MOV
    instruction.  If INFO is nonnull, use it to describe valid immediates.  */
 
 static bool
-aarch64_sve_valid_immediate (unsigned HOST_WIDE_INT val64,
+aarch64_sve_valid_immediate (unsigned HOST_WIDE_INT ival, scalar_int_mode mode,
 			     simd_immediate_info *info)
 {
-  scalar_int_mode mode = DImode;
-  unsigned int val32 = val64 & 0xffffffff;
-  if (val32 == (val64 >> 32))
-    {
-      mode = SImode;
-      unsigned int val16 = val32 & 0xffff;
-      if (val16 == (val32 >> 16))
-	{
-	  mode = HImode;
-	  unsigned int val8 = val16 & 0xff;
-	  if (val8 == (val16 >> 8))
-	    mode = QImode;
-	}
-    }
-  HOST_WIDE_INT val = trunc_int_for_mode (val64, mode);
+  HOST_WIDE_INT val = trunc_int_for_mode (ival, mode);
   if (IN_RANGE (val, -0x80, 0x7f))
     {
       /* DUP with no shift.  */
@@ -22986,7 +22971,7 @@  aarch64_sve_valid_immediate (unsigned HOST_WIDE_INT val64,
 	*info = simd_immediate_info (mode, val);
       return true;
     }
-  if (aarch64_bitmask_imm (val64, mode))
+  if (aarch64_bitmask_imm (ival, mode))
     {
       /* DUPM.  */
       if (info)
@@ -23067,6 +23052,91 @@  aarch64_sve_pred_valid_immediate (rtx x, simd_immediate_info *info)
   return false;
 }
 
+/* We can only represent floating point constants which will fit in
+   "quarter-precision" values.  These values are characterised by
+   a sign bit, a 4-bit mantissa and a 3-bit exponent.  And are given
+   by:
+
+   (-1)^s * (n/16) * 2^r
+
+   Where:
+     's' is the sign bit.
+     'n' is an integer in the range 16 <= n <= 31.
+     'r' is an integer in the range -3 <= r <= 4.
+
+   Return true iff R represents a vale encodable into an AArch64 floating point
+   move instruction as an immediate.  Othewise false.  */
+
+static bool
+aarch64_real_float_const_representable_p (REAL_VALUE_TYPE r)
+{
+  /* This represents our current view of how many bits
+     make up the mantissa.  */
+  int point_pos = 2 * HOST_BITS_PER_WIDE_INT - 1;
+  int exponent;
+  unsigned HOST_WIDE_INT mantissa, mask;
+  REAL_VALUE_TYPE m;
+  bool fail = false;
+
+ /* We cannot represent infinities, NaNs or +/-zero.  We won't
+     know if we have +zero until we analyse the mantissa, but we
+     can reject the other invalid values.  */
+  if (REAL_VALUE_ISINF (r) || REAL_VALUE_ISNAN (r)
+      || REAL_VALUE_MINUS_ZERO (r))
+    return false;
+
+  /* Extract exponent.  */
+  r = real_value_abs (&r);
+  exponent = REAL_EXP (&r);
+
+  /* For the mantissa, we expand into two HOST_WIDE_INTS, apart from the
+     highest (sign) bit, with a fixed binary point at bit point_pos.
+     m1 holds the low part of the mantissa, m2 the high part.
+     WARNING: If we ever have a representation using more than 2 * H_W_I - 1
+     bits for the mantissa, this can fail (low bits will be lost).  */
+  real_ldexp (&m, &r, point_pos - exponent);
+  wide_int w = real_to_integer (&m, &fail, HOST_BITS_PER_WIDE_INT * 2);
+
+  /* If the low part of the mantissa has bits set we cannot represent
+     the value.  */
+  if (fail || w.ulow () != 0)
+    return false;
+
+  /* We have rejected the lower HOST_WIDE_INT, so update our
+     understanding of how many bits lie in the mantissa and
+     look only at the high HOST_WIDE_INT.  */
+  mantissa = w.elt (1);
+  point_pos -= HOST_BITS_PER_WIDE_INT;
+
+  /* We can only represent values with a mantissa of the form 1.xxxx.  */
+  mask = ((unsigned HOST_WIDE_INT)1 << (point_pos - 5)) - 1;
+  if ((mantissa & mask) != 0)
+    return false;
+
+  /* Having filtered unrepresentable values, we may now remove all
+     but the highest 5 bits.  */
+  mantissa >>= point_pos - 5;
+
+  /* We cannot represent the value 0.0, so reject it.  This is handled
+     elsewhere.  */
+  if (mantissa == 0)
+    return false;
+
+  /* Then, as bit 4 is always set, we can mask it off, leaving
+     the mantissa in the range [0, 15].  */
+  mantissa &= ~(1 << 4);
+  gcc_assert (mantissa <= 15);
+
+  /* GCC internally does not use IEEE754-like encoding (where normalized
+     significands are in the range [1, 2).  GCC uses [0.5, 1) (see real.cc).
+     Our mantissa values are shifted 4 places to the left relative to
+     normalized IEEE754 so we must modify the exponent returned by REAL_EXP
+     by 5 places to correct for GCC's representation.  */
+  exponent = 5 - exponent;
+
+  return (exponent >= 0 && exponent <= 7);
+}
+
 /* Return true if OP is a valid SIMD immediate for the operation
    described by WHICH.  If INFO is nonnull, use it to describe valid
    immediates.  */
@@ -23120,20 +23190,6 @@  aarch64_simd_valid_immediate (rtx op, simd_immediate_info *info,
   else
     return false;
 
-  scalar_float_mode elt_float_mode;
-  if (n_elts == 1
-      && is_a <scalar_float_mode> (elt_mode, &elt_float_mode))
-    {
-      rtx elt = CONST_VECTOR_ENCODED_ELT (op, 0);
-      if (aarch64_float_const_zero_rtx_p (elt)
-	  || aarch64_float_const_representable_p (elt))
-	{
-	  if (info)
-	    *info = simd_immediate_info (elt_float_mode, elt);
-	  return true;
-	}
-    }
-
   /* If all elements in an SVE vector have the same value, we have a free
      choice between using the element mode and using the container mode.
      Using the element mode means that unused parts of the vector are
@@ -23195,10 +23251,57 @@  aarch64_simd_valid_immediate (rtx op, simd_immediate_info *info,
     val64 |= ((unsigned HOST_WIDE_INT) bytes[i % nbytes]
 	      << (i * BITS_PER_UNIT));
 
+  /* Try encoding the integer immediate as a floating point value if it's an exact
+     value.  */
+  scalar_float_mode fmode = DFmode;
+  scalar_int_mode imode = DImode;
+  unsigned HOST_WIDE_INT ival = val64;
+  unsigned int val32 = val64 & 0xffffffff;
+  if (val32 == (val64 >> 32))
+    {
+      fmode = SFmode;
+      imode = SImode;
+      ival = val32;
+      unsigned int val16 = val32 & 0xffff;
+      if (val16 == (val32 >> 16))
+	{
+	  fmode = HFmode;
+	  imode = HImode;
+	  ival = val16;
+	  unsigned int val8 = val16 & 0xff;
+	  if (val8 == (val16 >> 8))
+	    {
+	      imode = QImode;
+	      ival = val8;
+	    }
+	}
+    }
+
+  if (which == AARCH64_CHECK_MOV
+      && imode != QImode
+      && (imode != HImode || TARGET_FP_F16INST))
+    {
+      long int as_long_ints[2];
+      as_long_ints[0] = ival & 0xFFFFFFFF;
+      as_long_ints[1] = (ival >> 32) & 0xFFFFFFFF;
+
+      REAL_VALUE_TYPE r;
+      real_from_target (&r, as_long_ints, fmode);
+      if (aarch64_real_float_const_representable_p (r))
+	{
+	  if (info)
+	    {
+	      rtx float_val = const_double_from_real_value (r, fmode);
+	      *info = simd_immediate_info (fmode, float_val);
+	    }
+	  return true;
+	}
+    }
+
   if (vec_flags & VEC_SVE_DATA)
-    return aarch64_sve_valid_immediate (val64, info);
+    return aarch64_sve_valid_immediate (ival, imode, info);
   else
-    return aarch64_advsimd_valid_immediate (val64, info, which);
+    return aarch64_advsimd_valid_immediate (val64, imode, info, which);
 }
 
 /* Check whether X is a VEC_SERIES-like constant that starts at 0 and
@@ -25201,106 +25304,32 @@  aarch64_c_mode_for_suffix (char suffix)
   return VOIDmode;
 }
 
-/* We can only represent floating point constants which will fit in
-   "quarter-precision" values.  These values are characterised by
-   a sign bit, a 4-bit mantissa and a 3-bit exponent.  And are given
-   by:
-
-   (-1)^s * (n/16) * 2^r
-
-   Where:
-     's' is the sign bit.
-     'n' is an integer in the range 16 <= n <= 31.
-     'r' is an integer in the range -3 <= r <= 4.  */
-
-/* Return true iff X can be represented by a quarter-precision
+/* Return true iff X with mode MODE can be represented by a quarter-precision
    floating point immediate operand X.  Note, we cannot represent 0.0.  */
+
 bool
 aarch64_float_const_representable_p (rtx x)
 {
-  /* This represents our current view of how many bits
-     make up the mantissa.  */
-  int point_pos = 2 * HOST_BITS_PER_WIDE_INT - 1;
-  int exponent;
-  unsigned HOST_WIDE_INT mantissa, mask;
-  REAL_VALUE_TYPE r, m;
-  bool fail;
-
   x = unwrap_const_vec_duplicate (x);
+  machine_mode mode = GET_MODE (x);
   if (!CONST_DOUBLE_P (x))
     return false;
 
-  if (GET_MODE (x) == VOIDmode
-      || (GET_MODE (x) == HFmode && !TARGET_FP_F16INST))
+  if (mode == HFmode && !TARGET_FP_F16INST)
     return false;
 
-  r = *CONST_DOUBLE_REAL_VALUE (x);
-
-  /* We cannot represent infinities, NaNs or +/-zero.  We won't
-     know if we have +zero until we analyse the mantissa, but we
-     can reject the other invalid values.  */
-  if (REAL_VALUE_ISINF (r) || REAL_VALUE_ISNAN (r)
-      || REAL_VALUE_MINUS_ZERO (r))
-    return false;
+    REAL_VALUE_TYPE r = *CONST_DOUBLE_REAL_VALUE (x);
 
   /* For BFmode, only handle 0.0. */
-  if (GET_MODE (x) == BFmode)
+  if (mode == BFmode)
     return real_iszero (&r, false);
 
-  /* Extract exponent.  */
-  r = real_value_abs (&r);
-  exponent = REAL_EXP (&r);
-
-  /* For the mantissa, we expand into two HOST_WIDE_INTS, apart from the
-     highest (sign) bit, with a fixed binary point at bit point_pos.
-     m1 holds the low part of the mantissa, m2 the high part.
-     WARNING: If we ever have a representation using more than 2 * H_W_I - 1
-     bits for the mantissa, this can fail (low bits will be lost).  */
-  real_ldexp (&m, &r, point_pos - exponent);
-  wide_int w = real_to_integer (&m, &fail, HOST_BITS_PER_WIDE_INT * 2);
-
-  /* If the low part of the mantissa has bits set we cannot represent
-     the value.  */
-  if (w.ulow () != 0)
-    return false;
-  /* We have rejected the lower HOST_WIDE_INT, so update our
-     understanding of how many bits lie in the mantissa and
-     look only at the high HOST_WIDE_INT.  */
-  mantissa = w.elt (1);
-  point_pos -= HOST_BITS_PER_WIDE_INT;
-
-  /* We can only represent values with a mantissa of the form 1.xxxx.  */
-  mask = ((unsigned HOST_WIDE_INT)1 << (point_pos - 5)) - 1;
-  if ((mantissa & mask) != 0)
-    return false;
-
-  /* Having filtered unrepresentable values, we may now remove all
-     but the highest 5 bits.  */
-  mantissa >>= point_pos - 5;
-
-  /* We cannot represent the value 0.0, so reject it.  This is handled
-     elsewhere.  */
-  if (mantissa == 0)
-    return false;
-
-  /* Then, as bit 4 is always set, we can mask it off, leaving
-     the mantissa in the range [0, 15].  */
-  mantissa &= ~(1 << 4);
-  gcc_assert (mantissa <= 15);
-
-  /* GCC internally does not use IEEE754-like encoding (where normalized
-     significands are in the range [1, 2).  GCC uses [0.5, 1) (see real.cc).
-     Our mantissa values are shifted 4 places to the left relative to
-     normalized IEEE754 so we must modify the exponent returned by REAL_EXP
-     by 5 places to correct for GCC's representation.  */
-  exponent = 5 - exponent;
-
-  return (exponent >= 0 && exponent <= 7);
+  return aarch64_real_float_const_representable_p (r);
 }
 
-/* Returns the string with the instruction for AdvSIMD MOVI, MVNI, ORR or BIC
-   immediate with a CONST_VECTOR of MODE and WIDTH.  WHICH selects whether to
-   output MOVI/MVNI, ORR or BIC immediate.  */
+/* Returns the string with the instruction for AdvSIMD MOVI, MVNI, ORR, BIC or
+   FMOV immediate with a CONST_VECTOR of MODE and WIDTH.  WHICH selects whether
+   to output MOVI/MVNI, ORR or BIC immediate.  */
 char*
 aarch64_output_simd_mov_immediate (rtx const_vector, unsigned width,
 				   enum simd_immediate_check which)
diff --git a/gcc/testsuite/gcc.target/aarch64/const_create_using_fmov.c b/gcc/testsuite/gcc.target/aarch64/const_create_using_fmov.c
new file mode 100644
index 0000000000000000000000000000000000000000..e080afed8aa3578660027979335bfc859ca6bc91
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/const_create_using_fmov.c
@@ -0,0 +1,87 @@ 
+/* { dg-do compile } */
+/* { dg-additional-options "-march=armv9-a -Ofast" } */
+/* { dg-final { check-function-bodies "**" "" "" } } */
+
+#include <arm_neon.h>
+
+/*
+** g:
+** 	fmov	v0\.4s, 1\.0e\+0
+** 	ret
+*/
+float32x4_t g(){
+    return vdupq_n_f32(1);
+}
+
+/*
+** h:
+** 	fmov	v0\.4s, 1\.0e\+0
+** 	ret
+*/
+uint32x4_t h() {
+    return vreinterpretq_u32_f32(g());
+}
+
+/*
+** f1:
+** 	fmov	v0\.4s, 1\.0e\+0
+** 	ret
+*/
+uint32x4_t f1() {
+    return vdupq_n_u32(0x3f800000);
+}
+
+/*
+** f2:
+** 	fmov	v0\.4s, 1\.5e\+0
+** 	ret
+*/
+uint32x4_t f2() {
+    return vdupq_n_u32(0x3FC00000);
+}
+
+/*
+** f3:
+** 	fmov	v0\.4s, 1\.25e\+0
+** 	ret
+*/
+uint32x4_t f3() {
+    return vdupq_n_u32(0x3FA00000);
+}
+
+/*
+** f4:
+** 	fmov	v0\.2d, 1\.0e\+0
+** 	ret
+*/
+uint64x2_t f4() {
+    return vdupq_n_u64(0x3FF0000000000000);
+}
+
+/*
+** fn4:
+** 	fmov	v0\.2d, -1\.0e\+0
+** 	ret
+*/
+uint64x2_t fn4() {
+    return vdupq_n_u64(0xBFF0000000000000);
+}
+
+/*
+** f5:
+** 	fmov	v0\.8h, 1\.5e\+0
+** 	ret
+*/
+uint16x8_t f5() {
+    return vdupq_n_u16(0x3E00);
+}
+
+/*
+** f6:
+** 	adrp	x0, \.LC0
+** 	ldr	q0, \[x0, #:lo12:\.LC0\]
+** 	ret
+*/
+uint32x4_t f6() {
+    return vdupq_n_u32(0x4f800000);
+}