diff mbox series

[AArch64] ACLE intrinsics for BFCVTN, BFCVTN2 (AArch64 AdvSIMD) and BFCVT (AArch64 FP)

Message ID 67e55764-c543-a350-e685-40a419162c78@arm.com
State New
Headers show
Series [AArch64] ACLE intrinsics for BFCVTN, BFCVTN2 (AArch64 AdvSIMD) and BFCVT (AArch64 FP) | expand

Commit Message

Delia Burduv Dec. 20, 2019, 6:41 p.m. UTC
This patch adds the Armv8.6-a ACLE intrinsics for bfmmla, bfmlalb and 
bfmlalt as part of the BFloat16 extension.
(https://developer.arm.com/architectures/instruction-sets/simd-isas/neon/intrinsics)
The intrinsics are declared in arm_bf16.h and arm_neon.h and the RTL 
patterns are defined in aarch64-simd.md.
A new test is added to check assembler output.

This patch depends on the two Aarch64 back-end patches. 
(https://gcc.gnu.org/ml/gcc-patches/2019-12/msg01323.html and 
https://gcc.gnu.org/ml/gcc-patches/2019-12/msg01324.html)

Tested for regression on aarch64-none-elf and aarch64_be-none-elf. I 
don't have commit rights, so if this is ok can someone please commit it 
for me?

gcc/ChangeLog:

2019-11-06  Delia Burduv  <delia.burduv@arm.com>

         * config/aarch64/aarch64-simd-builtins.def
           (bfcvtn): New built-in function.
           (bfcvtn_q): New built-in function.
           (bfcvtn2): New built-in function.
           (bfcvt): New built-in function.
         * config/aarch64/aarch64-simd.md
           (aarch64_bfcvtn<q><mode>): New pattern.
           (aarch64_bfcvtn2v8bf): New pattern.
           (aarch64_bfcvtbf): New pattern.
         * config/aarch64/arm_bf16.h (float32_t): New typedef.
           (vcvth_bf16_f32): New intrinsic.
         * config/aarch64/arm_bf16.h (vcvt_bf16_f32): New intrinsic.
           (vcvtq_low_bf16_f32): New intrinsic.
           (vcvtq_high_bf16_f32): New intrinsic.
         * config/aarch64/iterators.md (V4SF_TO_BF): New mode iterator.
           (UNSPEC_BFCVTN): New UNSPEC.
           (UNSPEC_BFCVTN2): New UNSPEC.
           (UNSPEC_BFCVT): New UNSPEC.
         * config/arm/types.md (bf_cvt): New type.


gcc/testsuite/ChangeLog:

2019-11-06  Delia Burduv  <delia.burduv@arm.com>

         * gcc.target/aarch64/advsimd-intrinsics/bfcvt-compile.c: New test.

Comments

Richard Sandiford Dec. 23, 2019, 6:30 p.m. UTC | #1
Some of the comments on the BFMMLA/BFMLA[LT] patch apply here too.

Delia Burduv <Delia.Burduv@arm.com> writes:
> This patch adds the Armv8.6-a ACLE intrinsics for bfmmla, bfmlalb and 
> bfmlalt as part of the BFloat16 extension.

That's the other patch :-)

> [...]
> diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md
> index 55660ae248f4fa75d35ba2949cd4b9d5139ff5f5..ff7a1f5f34a19b05eba48dba96c736dfdfdf7bac 100644
> --- a/gcc/config/aarch64/aarch64-simd.md
> +++ b/gcc/config/aarch64/aarch64-simd.md
> @@ -7027,3 +7027,32 @@
>    "xtn\t%0.<Vntype>, %1.<Vtype>"
>    [(set_attr "type" "neon_shift_imm_narrow_q")]
>  )
> +
> +;; bfcvtn
> +(define_insn "aarch64_bfcvtn<q><mode>"
> +  [(set (match_operand:V4SF_TO_BF 0 "register_operand" "=w")
> +        (unspec:V4SF_TO_BF [(match_operand:V4SF 1 "register_operand" "w")]
> +                            UNSPEC_BFCVTN))]
> +  "TARGET_BF16_SIMD"
> +  "bfcvtn\\t%0.4h, %1.4s"
> +  [(set_attr "type" "f_cvt")]
> +)
> +

If I've understood the naming convention correctly, the closest type
seems to be "neon_fp_cvt_narrow_s_q".

> +(define_insn "aarch64_bfcvtn2v8bf"
> +  [(set (match_operand:V8BF 0 "register_operand" "=w")
> +        (unspec:V8BF [(match_operand:V8BF 1 "register_operand" "w")
> +                      (match_operand:V4SF 2 "register_operand" "w")]
> +                      UNSPEC_BFCVTN2))]
> +  "TARGET_BF16_SIMD"
> +  "bfcvtn2\\t%0.8h, %2.4s"
> +  [(set_attr "type" "f_cvt")]
> +)

Same here.

The constraint on operand 1 needs to be "0", otherwise operands 1 and 0
could end up in different registers.  You could test for this using
something like:

bfloat16x8_t test_bfcvtnq2_untied (bfloat16x8_t unused, bfloat16x8_t inactive,
				   float32x4_t a)
{
  return vcvtq_high_bf16_f32 (inactive, a);
}

which when compiled at -O should produce something like:

/*
**test_bfcvtnq2_untied:
**	mov	v0\.8h, v1\.8h
**	bfcvtn2	v0\.8h, v2\.4s
**	ret
*/

(Completely untested, the code above is probably wrong.)

> +
> +(define_insn "aarch64_bfcvtbf"
> +  [(set (match_operand:BF 0 "register_operand" "=w")
> +        (unspec:BF [(match_operand:SF 1 "register_operand" "w")]
> +                    UNSPEC_BFCVT))]
> +  "TARGET_BF16_SIMD"

I think this just needs the scalar macro rather than *_SIMD.

> +  "bfcvt\\t%h0, %s1"
> +  [(set_attr "type" "f_cvt")]
> +)
> diff --git a/gcc/config/aarch64/arm_bf16.h b/gcc/config/aarch64/arm_bf16.h
> index aedb0972735ce549fac1870bacd1ef3101e8fd26..1b9ab3690d35e153cd4f24b9e3bbb5b4cc4b4f4d 100644
> --- a/gcc/config/aarch64/arm_bf16.h
> +++ b/gcc/config/aarch64/arm_bf16.h
> @@ -34,7 +34,15 @@
>  #ifdef __ARM_FEATURE_BF16_SCALAR_ARITHMETIC
>  
>  typedef __bf16 bfloat16_t;
> -
> +typedef float float32_t;
> +
> +__extension__ extern __inline bfloat16_t
> +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> +vcvth_bf16_f32 \
> +      (float32_t __a)

No need for the line break here.

> +{
> +  return __builtin_aarch64_bfcvtbf (__a);
> +}
>  
>  #endif
>  #pragma GCC pop_options
> diff --git a/gcc/config/aarch64/arm_neon.h b/gcc/config/aarch64/arm_neon.h
> index 6cdbf381f0156ed993f03b847228b36ebbdd14f8..120f4b7d8827aee51834e75aeaa6ab8f8451980e 100644
> --- a/gcc/config/aarch64/arm_neon.h
> +++ b/gcc/config/aarch64/arm_neon.h
> @@ -34610,6 +34610,35 @@ vrnd64xq_f64 (float64x2_t __a)
>  
>  #include "arm_bf16.h"
>  
> +#pragma GCC push_options
> +#pragma GCC target ("arch=armv8.2-a+bf16")
> +#ifdef __ARM_FEATURE_BF16_VECTOR_ARITHMETIC
> +
> +__extension__ extern __inline bfloat16x4_t
> +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> +vcvt_bf16_f32 (float32x4_t __a)
> +{
> +  return __builtin_aarch64_bfcvtnv4bf (__a);
> +
> +}

Nit: extra blank line.

> +
> +__extension__ extern __inline bfloat16x8_t
> +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> +vcvtq_low_bf16_f32 (float32x4_t __a)
> +{
> +  return __builtin_aarch64_bfcvtn_qv8bf (__a);
> +}
> +
> +__extension__ extern __inline bfloat16x8_t
> +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> +vcvtq_high_bf16_f32 (bfloat16x8_t __inactive, float32x4_t __a)
> +{
> +  return __builtin_aarch64_bfcvtn2v8bf (__inactive, __a);
> +}
> +
> +#endif
> +#pragma GCC pop_options
> +
>  #pragma GCC pop_options
>  
>  #undef __aarch64_vget_lane_any
> diff --git a/gcc/config/aarch64/iterators.md b/gcc/config/aarch64/iterators.md
> index 931166da5e47302afe810498eea9c8c2ab89b9de..f9f0bafb1eca4da42e564224fca1fd43d89f6ed1 100644
> --- a/gcc/config/aarch64/iterators.md
> +++ b/gcc/config/aarch64/iterators.md
> @@ -431,6 +431,9 @@
>  ;; SVE predicate modes that control 16-bit, 32-bit or 64-bit elements.
>  (define_mode_iterator PRED_HSD [VNx8BI VNx4BI VNx2BI])
>  
> +;; Bfloat16 modes to which V4SF can be converted
> +(define_mode_iterator V4SF_TO_BF [V4BF V8BF])
> +
>  ;; ------------------------------------------------------------------
>  ;; Unspec enumerations for Advance SIMD. These could well go into
>  ;; aarch64.md but for their use in int_iterators here.
> @@ -673,6 +676,9 @@
>      UNSPEC_UMULHS	; Used in aarch64-sve2.md.
>      UNSPEC_UMULHRS	; Used in aarch64-sve2.md.
>      UNSPEC_ASRD		; Used in aarch64-sve.md.
> +    UNSPEC_BFCVTN	; Used in aarch64-simd.md.
> +    UNSPEC_BFCVTN2	; Used in aarch64-simd.md.
> +    UNSPEC_BFCVT	; Used in aarch64-simd.md.
>  ])
>  
>  ;; ------------------------------------------------------------------
> diff --git a/gcc/config/arm/types.md b/gcc/config/arm/types.md
> index df39522f2ad63a52c910b1a6bcc7aa13aaf5d021..dbcb4d58798d7f51b1b8310cd446c58317d7b50d 100644
> --- a/gcc/config/arm/types.md
> +++ b/gcc/config/arm/types.md
> @@ -1097,7 +1097,8 @@
>    crypto_sm4,\
>    coproc,\
>    tme,\
> -  memtag"
> +  memtag,\
> +  bf_cvt"

This doesn't seem to be used.

> diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/bfcvt-compile.c b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/bfcvt-compile.c
> new file mode 100644
> index 0000000000000000000000000000000000000000..ebe5b578c1fa82a6f2a166d55c7dc7e905b87135
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/bfcvt-compile.c
> @@ -0,0 +1,56 @@
> +/* { dg-do assemble { target { aarch64*-*-* } } } */
> +/* { dg-require-effective-target arm_v8_2a_bf16_neon_ok } */
> +/* { dg-add-options arm_v8_2a_bf16_neon } */
> +/* { dg-additional-options "-save-temps" } */
> +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
> +
> +#include <arm_neon.h>
> +
> +/*
> +**test_bfcvtn:
> +**	...
> +**	bfcvtn\tv[0-9]+.4h, v[0-9]+.4s
> +**	...
> +*/
> +bfloat16x4_t test_bfcvtn (float32x4_t a)
> +{
> +  return vcvt_bf16_f32 (a);
> +}
> +
> +/*
> +**test_bfcvtnq:
> +**	...
> +**	bfcvtn	v[0-9]+.4h, v[0-9]+.4s
> +**	...
> +*/
> +bfloat16x8_t test_bfcvtnq (float32x4_t a)
> +{
> +  return vcvtq_low_bf16_f32 (a);
> +}
> +
> +/*
> +**test_bfcvtnq2:
> +**	...
> +**	bfcvtn	v[0-9]+.4h, v[0-9]+.4s
> +**	...
> +*/
> +bfloat16x8_t test_bfcvtnq2 (bfloat16x8_t inactive, float32x4_t a)
> +{
> +  return vcvtq_high_bf16_f32 (inactive, a);
> +}
> +
> +/*
> +**test_bfcvt:
> +**	...
> +**	bfcvt	h[0-9]+, s[0-9]+
> +**	...
> +*/
> +bfloat16_t test_bfcvt (float32_t a)
> +{
> +  return vcvth_bf16_f32 (a);
> +}
> +
> +/* { dg-final { scan-assembler {bfcvtn\tv[0-9]+.4h, v[0-9]+.4s} } } */
> +/* { dg-final { scan-assembler {bfcvtn\tv[0-9]+.4h, v[0-9]+.4s} } } */
> +/* { dg-final { scan-assembler {bfcvtn\tv[0-9]+.4h, v[0-9]+.4s} } } */
> +/* { dg-final { scan-assembler {bfcvt\th[0-9]+, s[0-9]+} } } */

Same comments as for the BFMMLA/BFMLA[BT] tests.

As well as testing all these combinations for the SIMD case,
it would be good to have a direct arm_bf16.h-only test for:

#pragma GCC target "arch=armv8.2-a+bf16+nosimd"

test_bfcvt should still work in that case.

It would also be good to have a test that test_bfcvt reports
an appropriate error if compiled after:

#pragma GCC target "arch=armv8.2-a+nobf16"

Thanks,
Richard
Delia Burduv Jan. 31, 2020, 2:51 p.m. UTC | #2
Sorry for the confusion, what I meant to say was:

This patch adds the Armv8.6-a ACLE intrinsics for bfcvtn, bfcvtn2 and 
bfcvt as part of the BFloat16 extension.
(https://developer.arm.com/architectures/instruction-sets/simd-isas/neon/intrinsics)
The intrinsics are declared in arm_bf16.h and arm_neon.h and the RTL 
patterns are defined in aarch64-simd.md.

Tested for regression on aarch64-none-elf and aarch64_be-none-elf. I 
don't have commit rights, so if this is ok can someone please commit it 
for me?

Here is the updated patch.

Thank you,
Delia


gcc/ChangeLog:

2019-11-06  Delia Burduv  <delia.burduv@arm.com>

         * config/aarch64/aarch64-simd-builtins.def
         (bfcvtn): New built-in function.
         (bfcvtn_q): New built-in function.
         (bfcvtn2): New built-in function.
         (bfcvt): New built-in function.
         * config/aarch64/aarch64-simd.md
         (aarch64_bfcvtn<q><mode>): New pattern.
         (aarch64_bfcvtn2v8bf): New pattern.
         (aarch64_bfcvtbf): New pattern.
         * config/aarch64/arm_bf16.h (float32_t): New typedef.
         (vcvth_bf16_f32): New intrinsic.
         * config/aarch64/arm_bf16.h (vcvt_bf16_f32): New intrinsic.
         (vcvtq_low_bf16_f32): New intrinsic.
         (vcvtq_high_bf16_f32): New intrinsic.
         * config/aarch64/iterators.md (V4SF_TO_BF): New mode iterator.
         (UNSPEC_BFCVTN): New UNSPEC.
         (UNSPEC_BFCVTN2): New UNSPEC.
         (UNSPEC_BFCVT): New UNSPEC.
         * config/arm/types.md (bf_cvt): New type.


gcc/testsuite/ChangeLog:

2020-01-31  Delia Burduv  <delia.burduv@arm.com>

         * gcc.target/aarch64/advsimd-intrinsics/bfcvt-compile.c: New
	test.
	* gcc.target/aarch64/advsimd-intrinsics/bfcvt-nobf16.c: New
	test.
	* gcc.target/aarch64/advsimd-intrinsics/bfcvt-nosimd.c: New
	test.
	* gcc.target/aarch64/advsimd-intrinsics/bfcvtnq2-untied.c: New
	test.

On 12/23/19 6:30 PM, Richard Sandiford wrote:
> Some of the comments on the BFMMLA/BFMLA[LT] patch apply here too.
> 
> Delia Burduv <Delia.Burduv@arm.com> writes:
>> This patch adds the Armv8.6-a ACLE intrinsics for bfmmla, bfmlalb and
>> bfmlalt as part of the BFloat16 extension.
> 
> That's the other patch :-)
> 
>> [...]
>> diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md
>> index 55660ae248f4fa75d35ba2949cd4b9d5139ff5f5..ff7a1f5f34a19b05eba48dba96c736dfdfdf7bac 100644
>> --- a/gcc/config/aarch64/aarch64-simd.md
>> +++ b/gcc/config/aarch64/aarch64-simd.md
>> @@ -7027,3 +7027,32 @@
>>     "xtn\t%0.<Vntype>, %1.<Vtype>"
>>     [(set_attr "type" "neon_shift_imm_narrow_q")]
>>   )
>> +
>> +;; bfcvtn
>> +(define_insn "aarch64_bfcvtn<q><mode>"
>> +  [(set (match_operand:V4SF_TO_BF 0 "register_operand" "=w")
>> +        (unspec:V4SF_TO_BF [(match_operand:V4SF 1 "register_operand" "w")]
>> +                            UNSPEC_BFCVTN))]
>> +  "TARGET_BF16_SIMD"
>> +  "bfcvtn\\t%0.4h, %1.4s"
>> +  [(set_attr "type" "f_cvt")]
>> +)
>> +
> 
> If I've understood the naming convention correctly, the closest type
> seems to be "neon_fp_cvt_narrow_s_q".
> 
>> +(define_insn "aarch64_bfcvtn2v8bf"
>> +  [(set (match_operand:V8BF 0 "register_operand" "=w")
>> +        (unspec:V8BF [(match_operand:V8BF 1 "register_operand" "w")
>> +                      (match_operand:V4SF 2 "register_operand" "w")]
>> +                      UNSPEC_BFCVTN2))]
>> +  "TARGET_BF16_SIMD"
>> +  "bfcvtn2\\t%0.8h, %2.4s"
>> +  [(set_attr "type" "f_cvt")]
>> +)
> 
> Same here.
> 
> The constraint on operand 1 needs to be "0", otherwise operands 1 and 0
> could end up in different registers.  You could test for this using
> something like:
> 
> bfloat16x8_t test_bfcvtnq2_untied (bfloat16x8_t unused, bfloat16x8_t inactive,
> 				   float32x4_t a)
> {
>    return vcvtq_high_bf16_f32 (inactive, a);
> }
> 
> which when compiled at -O should produce something like:
> 
> /*
> **test_bfcvtnq2_untied:
> **	mov	v0\.8h, v1\.8h
> **	bfcvtn2	v0\.8h, v2\.4s
> **	ret
> */
> 
> (Completely untested, the code above is probably wrong.)
> 
>> +
>> +(define_insn "aarch64_bfcvtbf"
>> +  [(set (match_operand:BF 0 "register_operand" "=w")
>> +        (unspec:BF [(match_operand:SF 1 "register_operand" "w")]
>> +                    UNSPEC_BFCVT))]
>> +  "TARGET_BF16_SIMD"
> 
> I think this just needs the scalar macro rather than *_SIMD.
> 
>> +  "bfcvt\\t%h0, %s1"
>> +  [(set_attr "type" "f_cvt")]
>> +)
>> diff --git a/gcc/config/aarch64/arm_bf16.h b/gcc/config/aarch64/arm_bf16.h
>> index aedb0972735ce549fac1870bacd1ef3101e8fd26..1b9ab3690d35e153cd4f24b9e3bbb5b4cc4b4f4d 100644
>> --- a/gcc/config/aarch64/arm_bf16.h
>> +++ b/gcc/config/aarch64/arm_bf16.h
>> @@ -34,7 +34,15 @@
>>   #ifdef __ARM_FEATURE_BF16_SCALAR_ARITHMETIC
>>   
>>   typedef __bf16 bfloat16_t;
>> -
>> +typedef float float32_t;
>> +
>> +__extension__ extern __inline bfloat16_t
>> +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
>> +vcvth_bf16_f32 \
>> +      (float32_t __a)
> 
> No need for the line break here.
> 
>> +{
>> +  return __builtin_aarch64_bfcvtbf (__a);
>> +}
>>   
>>   #endif
>>   #pragma GCC pop_options
>> diff --git a/gcc/config/aarch64/arm_neon.h b/gcc/config/aarch64/arm_neon.h
>> index 6cdbf381f0156ed993f03b847228b36ebbdd14f8..120f4b7d8827aee51834e75aeaa6ab8f8451980e 100644
>> --- a/gcc/config/aarch64/arm_neon.h
>> +++ b/gcc/config/aarch64/arm_neon.h
>> @@ -34610,6 +34610,35 @@ vrnd64xq_f64 (float64x2_t __a)
>>   
>>   #include "arm_bf16.h"
>>   
>> +#pragma GCC push_options
>> +#pragma GCC target ("arch=armv8.2-a+bf16")
>> +#ifdef __ARM_FEATURE_BF16_VECTOR_ARITHMETIC
>> +
>> +__extension__ extern __inline bfloat16x4_t
>> +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
>> +vcvt_bf16_f32 (float32x4_t __a)
>> +{
>> +  return __builtin_aarch64_bfcvtnv4bf (__a);
>> +
>> +}
> 
> Nit: extra blank line.
> 
>> +
>> +__extension__ extern __inline bfloat16x8_t
>> +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
>> +vcvtq_low_bf16_f32 (float32x4_t __a)
>> +{
>> +  return __builtin_aarch64_bfcvtn_qv8bf (__a);
>> +}
>> +
>> +__extension__ extern __inline bfloat16x8_t
>> +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
>> +vcvtq_high_bf16_f32 (bfloat16x8_t __inactive, float32x4_t __a)
>> +{
>> +  return __builtin_aarch64_bfcvtn2v8bf (__inactive, __a);
>> +}
>> +
>> +#endif
>> +#pragma GCC pop_options
>> +
>>   #pragma GCC pop_options
>>   
>>   #undef __aarch64_vget_lane_any
>> diff --git a/gcc/config/aarch64/iterators.md b/gcc/config/aarch64/iterators.md
>> index 931166da5e47302afe810498eea9c8c2ab89b9de..f9f0bafb1eca4da42e564224fca1fd43d89f6ed1 100644
>> --- a/gcc/config/aarch64/iterators.md
>> +++ b/gcc/config/aarch64/iterators.md
>> @@ -431,6 +431,9 @@
>>   ;; SVE predicate modes that control 16-bit, 32-bit or 64-bit elements.
>>   (define_mode_iterator PRED_HSD [VNx8BI VNx4BI VNx2BI])
>>   
>> +;; Bfloat16 modes to which V4SF can be converted
>> +(define_mode_iterator V4SF_TO_BF [V4BF V8BF])
>> +
>>   ;; ------------------------------------------------------------------
>>   ;; Unspec enumerations for Advance SIMD. These could well go into
>>   ;; aarch64.md but for their use in int_iterators here.
>> @@ -673,6 +676,9 @@
>>       UNSPEC_UMULHS	; Used in aarch64-sve2.md.
>>       UNSPEC_UMULHRS	; Used in aarch64-sve2.md.
>>       UNSPEC_ASRD		; Used in aarch64-sve.md.
>> +    UNSPEC_BFCVTN	; Used in aarch64-simd.md.
>> +    UNSPEC_BFCVTN2	; Used in aarch64-simd.md.
>> +    UNSPEC_BFCVT	; Used in aarch64-simd.md.
>>   ])
>>   
>>   ;; ------------------------------------------------------------------
>> diff --git a/gcc/config/arm/types.md b/gcc/config/arm/types.md
>> index df39522f2ad63a52c910b1a6bcc7aa13aaf5d021..dbcb4d58798d7f51b1b8310cd446c58317d7b50d 100644
>> --- a/gcc/config/arm/types.md
>> +++ b/gcc/config/arm/types.md
>> @@ -1097,7 +1097,8 @@
>>     crypto_sm4,\
>>     coproc,\
>>     tme,\
>> -  memtag"
>> +  memtag,\
>> +  bf_cvt"
> 
> This doesn't seem to be used.
> 
>> diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/bfcvt-compile.c b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/bfcvt-compile.c
>> new file mode 100644
>> index 0000000000000000000000000000000000000000..ebe5b578c1fa82a6f2a166d55c7dc7e905b87135
>> --- /dev/null
>> +++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/bfcvt-compile.c
>> @@ -0,0 +1,56 @@
>> +/* { dg-do assemble { target { aarch64*-*-* } } } */
>> +/* { dg-require-effective-target arm_v8_2a_bf16_neon_ok } */
>> +/* { dg-add-options arm_v8_2a_bf16_neon } */
>> +/* { dg-additional-options "-save-temps" } */
>> +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
>> +
>> +#include <arm_neon.h>
>> +
>> +/*
>> +**test_bfcvtn:
>> +**	...
>> +**	bfcvtn\tv[0-9]+.4h, v[0-9]+.4s
>> +**	...
>> +*/
>> +bfloat16x4_t test_bfcvtn (float32x4_t a)
>> +{
>> +  return vcvt_bf16_f32 (a);
>> +}
>> +
>> +/*
>> +**test_bfcvtnq:
>> +**	...
>> +**	bfcvtn	v[0-9]+.4h, v[0-9]+.4s
>> +**	...
>> +*/
>> +bfloat16x8_t test_bfcvtnq (float32x4_t a)
>> +{
>> +  return vcvtq_low_bf16_f32 (a);
>> +}
>> +
>> +/*
>> +**test_bfcvtnq2:
>> +**	...
>> +**	bfcvtn	v[0-9]+.4h, v[0-9]+.4s
>> +**	...
>> +*/
>> +bfloat16x8_t test_bfcvtnq2 (bfloat16x8_t inactive, float32x4_t a)
>> +{
>> +  return vcvtq_high_bf16_f32 (inactive, a);
>> +}
>> +
>> +/*
>> +**test_bfcvt:
>> +**	...
>> +**	bfcvt	h[0-9]+, s[0-9]+
>> +**	...
>> +*/
>> +bfloat16_t test_bfcvt (float32_t a)
>> +{
>> +  return vcvth_bf16_f32 (a);
>> +}
>> +
>> +/* { dg-final { scan-assembler {bfcvtn\tv[0-9]+.4h, v[0-9]+.4s} } } */
>> +/* { dg-final { scan-assembler {bfcvtn\tv[0-9]+.4h, v[0-9]+.4s} } } */
>> +/* { dg-final { scan-assembler {bfcvtn\tv[0-9]+.4h, v[0-9]+.4s} } } */
>> +/* { dg-final { scan-assembler {bfcvt\th[0-9]+, s[0-9]+} } } */
> 
> Same comments as for the BFMMLA/BFMLA[BT] tests.
> 
> As well as testing all these combinations for the SIMD case,
> it would be good to have a direct arm_bf16.h-only test for:
> 
> #pragma GCC target "arch=armv8.2-a+bf16+nosimd"
> 
> test_bfcvt should still work in that case.
> 
> It would also be good to have a test that test_bfcvt reports
> an appropriate error if compiled after:
> 
> #pragma GCC target "arch=armv8.2-a+nobf16"
> 
> Thanks,
> Richard
>
Richard Sandiford Jan. 31, 2020, 4:23 p.m. UTC | #3
Delia Burduv <Delia.Burduv@arm.com> writes:
> [...]
> diff --git a/gcc/config/aarch64/arm_bf16.h b/gcc/config/aarch64/arm_bf16.h
> index 3759c0d1cb449a7f0125cc2a1433127564d66622..fb2150e1d60a590046e2c034422021aafc721e23 100644
> --- a/gcc/config/aarch64/arm_bf16.h
> +++ b/gcc/config/aarch64/arm_bf16.h
> @@ -28,5 +28,13 @@
>  #define _AARCH64_BF16_H_
>  
>  typedef __bf16 bfloat16_t;
> +typedef float float32_t;
> +
> +__extension__ extern __inline bfloat16_t
> +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> +vcvth_bf16_f32 (float32_t __a)
> +{
> +  return __builtin_aarch64_bfcvtbf (__a);
> +}

Sorry for not noticing last time, but this should be wrapped in:

#pragma GCC push_options
#pragma GCC target ("+nothing+bf16")

...

#pragma GCC pop_options

otherwise I think calling this function without +bf16 would trigger
an internal compiler error.  It would be good to have a test that
that doesn't happen (something along the lines of bfcvt-nobf16.c,
but for the scalar case).

> [...]
> diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/bfcvt-compile.c b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/bfcvt-compile.c
> new file mode 100644
> index 0000000000000000000000000000000000000000..ffb5305e2e5ea1aadae07e82fd8ed6f9f247c1a9
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/bfcvt-compile.c
> @@ -0,0 +1,48 @@
> +/* { dg-do assemble { target { aarch64*-*-* } } } */

The { target ... } isn't necessary here.  (Missed that in the other
review, sorry.)

> +/* { dg-require-effective-target arm_v8_2a_bf16_neon_ok } */
> +/* { dg-add-options arm_v8_2a_bf16_neon } */
> +/* { dg-additional-options "-save-temps" } */
> +/* { dg-final { check-function-bodies "**" "" {-O[^0]} } } */
> +/* { dg-skip-if "" { *-*-* } { "-fno-fat-lto-objects" } } */
> +
> +#include <arm_neon.h>
> +
> +/*
> +**test_bfcvtn:
> +**     bfcvtn\tv0.4h, v0.4s

Like with the other review, I think the literal tab you had in the
original patch looks better than \t.

> [...]
> diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/bfcvt-nosimd.c b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/bfcvt-nosimd.c
> new file mode 100644
> index 0000000000000000000000000000000000000000..8d7dffe16275de60e884c449afa0fea0b1af6081
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/bfcvt-nosimd.c
> @@ -0,0 +1,15 @@
> +/* { dg-do assemble { target { aarch64*-*-* } } } */

This needs:

/* { dg-require-effective-target aarch64_asm_bf16_ok } */

(Doesn't exist yet, but I hope to post a patch soon.)

Looks good otherwise, thanks.

Richard
Tamar Christina Feb. 18, 2020, 1:24 p.m. UTC | #4
Hi Richard,

> 0000000000000000000000000000000000000000..ffb5305e2e5ea1aadae07e82f
> d8e
> > d6f9f247c1a9
> > --- /dev/null
> > +++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/bfcvt-compil
> > +++ e.c
> > @@ -0,0 +1,48 @@
> > +/* { dg-do assemble { target { aarch64*-*-* } } } */
> 
> The { target ... } isn't necessary here.  (Missed that in the other review, sorry.)
> 

Why not? The advsimd-intrinsics tests are shared between both AArch32 and AArch64.

Tamar.

> > +/* { dg-require-effective-target arm_v8_2a_bf16_neon_ok } */
> > +/* { dg-add-options arm_v8_2a_bf16_neon } */
> > +/* { dg-additional-options "-save-temps" } */
> > +/* { dg-final { check-function-bodies "**" "" {-O[^0]} } } */
> > +/* { dg-skip-if "" { *-*-* } { "-fno-fat-lto-objects" } } */
> > +
> > +#include <arm_neon.h>
> > +
> > +/*
> > +**test_bfcvtn:
> > +**     bfcvtn\tv0.4h, v0.4s
> 
> Like with the other review, I think the literal tab you had in the original patch
> looks better than \t.
> 
> > [...]
> > diff --git
> > a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/bfcvt-nosimd.c
> > b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/bfcvt-nosimd.c
> > new file mode 100644
> > index
> >
> 0000000000000000000000000000000000000000..8d7dffe16275de60e884c449af
> a0
> > fea0b1af6081
> > --- /dev/null
> > +++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/bfcvt-nosimd
> > +++ .c
> > @@ -0,0 +1,15 @@
> > +/* { dg-do assemble { target { aarch64*-*-* } } } */
> 
> This needs:
> 
> /* { dg-require-effective-target aarch64_asm_bf16_ok } */
> 
> (Doesn't exist yet, but I hope to post a patch soon.)
> 
> Looks good otherwise, thanks.
> 
> Richard
Richard Sandiford Feb. 18, 2020, 1:51 p.m. UTC | #5
Tamar Christina <Tamar.Christina@arm.com> writes:
> Hi Richard,
>
>> 0000000000000000000000000000000000000000..ffb5305e2e5ea1aadae07e82f
>> d8e
>> > d6f9f247c1a9
>> > --- /dev/null
>> > +++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/bfcvt-compil
>> > +++ e.c
>> > @@ -0,0 +1,48 @@
>> > +/* { dg-do assemble { target { aarch64*-*-* } } } */
>> 
>> The { target ... } isn't necessary here.  (Missed that in the other review, sorry.)
>> 
>
> Why not? The advsimd-intrinsics tests are shared between both AArch32 and AArch64.

Ah, so they are.  Think it would better to move them to a new
gcc.target/arm-common or something in that case.  Tests in
gcc.target/aarch64 really ought to be specific to aarch64.

Thanks,
Richard

>
> Tamar.
>
>> > +/* { dg-require-effective-target arm_v8_2a_bf16_neon_ok } */
>> > +/* { dg-add-options arm_v8_2a_bf16_neon } */
>> > +/* { dg-additional-options "-save-temps" } */
>> > +/* { dg-final { check-function-bodies "**" "" {-O[^0]} } } */
>> > +/* { dg-skip-if "" { *-*-* } { "-fno-fat-lto-objects" } } */
>> > +
>> > +#include <arm_neon.h>
>> > +
>> > +/*
>> > +**test_bfcvtn:
>> > +**     bfcvtn\tv0.4h, v0.4s
>> 
>> Like with the other review, I think the literal tab you had in the original patch
>> looks better than \t.
>> 
>> > [...]
>> > diff --git
>> > a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/bfcvt-nosimd.c
>> > b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/bfcvt-nosimd.c
>> > new file mode 100644
>> > index
>> >
>> 0000000000000000000000000000000000000000..8d7dffe16275de60e884c449af
>> a0
>> > fea0b1af6081
>> > --- /dev/null
>> > +++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/bfcvt-nosimd
>> > +++ .c
>> > @@ -0,0 +1,15 @@
>> > +/* { dg-do assemble { target { aarch64*-*-* } } } */
>> 
>> This needs:
>> 
>> /* { dg-require-effective-target aarch64_asm_bf16_ok } */
>> 
>> (Doesn't exist yet, but I hope to post a patch soon.)
>> 
>> Looks good otherwise, thanks.
>> 
>> Richard
Delia Burduv March 3, 2020, 4:47 p.m. UTC | #6
Hi,

Here is the latest version of the patch.

On 2/18/20 1:51 PM, Richard Sandiford wrote:
> Tamar Christina <Tamar.Christina@arm.com> writes:
>> Hi Richard,
>>
>>> 0000000000000000000000000000000000000000..ffb5305e2e5ea1aadae07e82f
>>> d8e
>>>> d6f9f247c1a9
>>>> --- /dev/null
>>>> +++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/bfcvt-compil
>>>> +++ e.c
>>>> @@ -0,0 +1,48 @@
>>>> +/* { dg-do assemble { target { aarch64*-*-* } } } */
>>>
>>> The { target ... } isn't necessary here.  (Missed that in the other review, sorry.)
>>>
>>
>> Why not? The advsimd-intrinsics tests are shared between both AArch32 and AArch64.
> 
> Ah, so they are.  Think it would better to move them to a new
> gcc.target/arm-common or something in that case.  Tests in
> gcc.target/aarch64 really ought to be specific to aarch64.
> 
> Thanks,
> Richard
> 

I left the advsimd-intrinsics tests shared since creating a new 
gcc.target/arm-common should probably be a separate patch.

Let me know if this patch is ok. And if it is, can someone please commit 
it for me?

Thanks,
Delia

>>
>> Tamar.
>>
>>>> +/* { dg-require-effective-target arm_v8_2a_bf16_neon_ok } */
>>>> +/* { dg-add-options arm_v8_2a_bf16_neon } */
>>>> +/* { dg-additional-options "-save-temps" } */
>>>> +/* { dg-final { check-function-bodies "**" "" {-O[^0]} } } */
>>>> +/* { dg-skip-if "" { *-*-* } { "-fno-fat-lto-objects" } } */
>>>> +
>>>> +#include <arm_neon.h>
>>>> +
>>>> +/*
>>>> +**test_bfcvtn:
>>>> +**     bfcvtn\tv0.4h, v0.4s
>>>
>>> Like with the other review, I think the literal tab you had in the original patch
>>> looks better than \t.
>>>
>>>> [...]
>>>> diff --git
>>>> a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/bfcvt-nosimd.c
>>>> b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/bfcvt-nosimd.c
>>>> new file mode 100644
>>>> index
>>>>
>>> 0000000000000000000000000000000000000000..8d7dffe16275de60e884c449af
>>> a0
>>>> fea0b1af6081
>>>> --- /dev/null
>>>> +++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/bfcvt-nosimd
>>>> +++ .c
>>>> @@ -0,0 +1,15 @@
>>>> +/* { dg-do assemble { target { aarch64*-*-* } } } */
>>>
>>> This needs:
>>>
>>> /* { dg-require-effective-target aarch64_asm_bf16_ok } */
>>>
>>> (Doesn't exist yet, but I hope to post a patch soon.)
>>>
>>> Looks good otherwise, thanks.
>>>
>>> Richard
Richard Sandiford March 5, 2020, 11:06 a.m. UTC | #7
Hi,

Thanks for the update and sorry for the slow reply.

When I try the patch locally I get:

FAIL: gcc.target/aarch64/advsimd-intrinsics/bfcvt-nosimd.c   -O0  (test for excess errors)
FAIL: gcc.target/aarch64/advsimd-intrinsics/bfcvt-nosimd.c   -O1  (test for excess errors)
FAIL: gcc.target/aarch64/advsimd-intrinsics/bfcvt-nosimd.c   -O2  (test for excess errors)
FAIL: gcc.target/aarch64/advsimd-intrinsics/bfcvt-nosimd.c   -O2 -flto -fno-use-linker-plugin -flto-partition=none  (test for excess errors)
FAIL: gcc.target/aarch64/advsimd-intrinsics/bfcvt-nosimd.c   -O2 -flto -fuse-linker-plugin -fno-fat-lto-objects  (test for excess errors)
FAIL: gcc.target/aarch64/advsimd-intrinsics/bfcvt-nosimd.c   -O3 -g  (test for excess errors)
FAIL: gcc.target/aarch64/advsimd-intrinsics/bfcvt-nosimd.c   -Og -g  (test for excess errors)
FAIL: gcc.target/aarch64/advsimd-intrinsics/bfcvt-nosimd.c   -Os  (test for excess errors)

I think that's because:

Delia Burduv <delia.burduv@arm.com> writes:
> diff --git a/gcc/config/aarch64/arm_bf16.h b/gcc/config/aarch64/arm_bf16.h
> index 3759c0d1cb449a7f0125cc2a1433127564d66622..fa7080c2953bc3254f01d842a8afef917d469080 100644
> --- a/gcc/config/aarch64/arm_bf16.h
> +++ b/gcc/config/aarch64/arm_bf16.h
> @@ -27,6 +27,19 @@
>  #ifndef _AARCH64_BF16_H_
>  #define _AARCH64_BF16_H_
>  
> +#pragma GCC push_options
> +#pragma GCC target ("+nothing+bf16")
> +
>  typedef __bf16 bfloat16_t;
> +typedef float float32_t;
> +
> +__extension__ extern __inline bfloat16_t
> +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> +vcvth_bf16_f32 (float32_t __a)
> +{
> +  return __builtin_aarch64_bfcvtbf (__a);
> +}
> +
> +#pragma GCC pop_options

"+bf16" implicitly enables "+simd", so functions guarded with
"+nothing+bf16" are only available when "+simd" is available.
I think we want "+nothing+bf16+nosimd" instead.  (Haven't tested
that though.)

Very minor, but: it might be clearer to leave the typedefs outside
of the #pragma block.  It doesn't make any difference to the behaviour,
but it emphasises that the typedefs really are available unconditionally.

Looks ready to go otherwise.

Thanks,
Richard
Delia Burduv March 5, 2020, 5:47 p.m. UTC | #8
Hi,

Here is the latest version of the  patch. That test should now work.

Thanks,
Delia

On 3/5/20 11:06 AM, Richard Sandiford wrote:
> Hi,
> 
> Thanks for the update and sorry for the slow reply.
> 
> When I try the patch locally I get:
> 
> FAIL: gcc.target/aarch64/advsimd-intrinsics/bfcvt-nosimd.c   -O0  (test for excess errors)
> FAIL: gcc.target/aarch64/advsimd-intrinsics/bfcvt-nosimd.c   -O1  (test for excess errors)
> FAIL: gcc.target/aarch64/advsimd-intrinsics/bfcvt-nosimd.c   -O2  (test for excess errors)
> FAIL: gcc.target/aarch64/advsimd-intrinsics/bfcvt-nosimd.c   -O2 -flto -fno-use-linker-plugin -flto-partition=none  (test for excess errors)
> FAIL: gcc.target/aarch64/advsimd-intrinsics/bfcvt-nosimd.c   -O2 -flto -fuse-linker-plugin -fno-fat-lto-objects  (test for excess errors)
> FAIL: gcc.target/aarch64/advsimd-intrinsics/bfcvt-nosimd.c   -O3 -g  (test for excess errors)
> FAIL: gcc.target/aarch64/advsimd-intrinsics/bfcvt-nosimd.c   -Og -g  (test for excess errors)
> FAIL: gcc.target/aarch64/advsimd-intrinsics/bfcvt-nosimd.c   -Os  (test for excess errors)
> 
> I think that's because:
> 
> Delia Burduv <delia.burduv@arm.com> writes:
>> diff --git a/gcc/config/aarch64/arm_bf16.h b/gcc/config/aarch64/arm_bf16.h
>> index 3759c0d1cb449a7f0125cc2a1433127564d66622..fa7080c2953bc3254f01d842a8afef917d469080 100644
>> --- a/gcc/config/aarch64/arm_bf16.h
>> +++ b/gcc/config/aarch64/arm_bf16.h
>> @@ -27,6 +27,19 @@
>>   #ifndef _AARCH64_BF16_H_
>>   #define _AARCH64_BF16_H_
>>   
>> +#pragma GCC push_options
>> +#pragma GCC target ("+nothing+bf16")
>> +
>>   typedef __bf16 bfloat16_t;
>> +typedef float float32_t;
>> +
>> +__extension__ extern __inline bfloat16_t
>> +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
>> +vcvth_bf16_f32 (float32_t __a)
>> +{
>> +  return __builtin_aarch64_bfcvtbf (__a);
>> +}
>> +
>> +#pragma GCC pop_options
> 
> "+bf16" implicitly enables "+simd", so functions guarded with
> "+nothing+bf16" are only available when "+simd" is available.
> I think we want "+nothing+bf16+nosimd" instead.  (Haven't tested
> that though.)
> 
> Very minor, but: it might be clearer to leave the typedefs outside
> of the #pragma block.  It doesn't make any difference to the behaviour,
> but it emphasises that the typedefs really are available unconditionally.
> 
> Looks ready to go otherwise.
> 
> Thanks,
> Richard
>
Richard Sandiford March 6, 2020, 9:54 a.m. UTC | #9
Delia Burduv <delia.burduv@arm.com> writes:
> Hi,
>
> Here is the latest version of the  patch. That test should now work.

Thanks, pushed.

Richard
Vaseeharan Vinayagamoorthy March 10, 2020, 6:27 p.m. UTC | #10
Hi,

I think this commit causes a failure on aarch64-none-elf due to a DejaGnu
typo in gcc.target/aarch64/advsimd-intrinsics/bfcvt-nosimd.c.

{ dg-final { check-function-bodies "**" "" "-O[^0]" } }

I think the square brackets need to be escaped or use {-O[^0]}.

Regards
Vasee

Fri, Mar 06, 2020 at 09:54:07AM +0000, Richard Sandiford wrote:
> Delia Burduv <delia.burduv@arm.com> writes:
> > Hi,
> >
> > Here is the latest version of the  patch. That test should now work.
> 
> Thanks, pushed.
> 
> Richard
Richard Sandiford March 11, 2020, 4:37 p.m. UTC | #11
Vasee Vinayagamoorthy <vaseeharan.vinayagamoorthy@arm.com> writes:
> Hi,
>
> I think this commit causes a failure on aarch64-none-elf due to a DejaGnu
> typo in gcc.target/aarch64/advsimd-intrinsics/bfcvt-nosimd.c.
>
> { dg-final { check-function-bodies "**" "" "-O[^0]" } }
>
> I think the square brackets need to be escaped or use {-O[^0]}.

Gah, this is my fault.  I'd noticed that the line previously used
"-DCHECK_ASM", which meant that the test would never be performed
(because nothing ever adds -DCHECK_ASM for this test harness).
I should have noticed that during previous review cycles and
didn't want to force another review round over it, so I made
the change locally.  But then I compounded the mistake by messing
up the testing somehow, and also forgetting about the change when
doing the commit. :-(

Really sorry about that, and sorry especially to Delia for messing
up her patch.  It sounds from off-list discussion like you have
a fix in the works, so I'll defer to that.

Thanks,
Richard

>
> Regards
> Vasee
>
> Fri, Mar 06, 2020 at 09:54:07AM +0000, Richard Sandiford wrote:
>> Delia Burduv <delia.burduv@arm.com> writes:
>> > Hi,
>> >
>> > Here is the latest version of the  patch. That test should now work.
>> 
>> Thanks, pushed.
>> 
>> Richard
diff mbox series

Patch

diff --git a/gcc/config/aarch64/aarch64-simd-builtins.def b/gcc/config/aarch64/aarch64-simd-builtins.def
index f4ca35a59704c761fe2ac2b6d401fff7c8aba80d..30a425bd3aec121e78f269f44e188bdb8d39e75f 100644
--- a/gcc/config/aarch64/aarch64-simd-builtins.def
+++ b/gcc/config/aarch64/aarch64-simd-builtins.def
@@ -682,3 +682,9 @@ 
   BUILTIN_VSFDF (UNOP, frint32x, 0)
   BUILTIN_VSFDF (UNOP, frint64z, 0)
   BUILTIN_VSFDF (UNOP, frint64x, 0)
+
+  /* Implemented by aarch64_bfcvtn{q}{2}<mode>  */
+  VAR1 (UNOP, bfcvtn, 0, v4bf)
+  VAR1 (UNOP, bfcvtn_q, 0, v8bf)
+  VAR1 (BINOP, bfcvtn2, 0, v8bf)
+  VAR1 (UNOP, bfcvt, 0, bf)
diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md
index 55660ae248f4fa75d35ba2949cd4b9d5139ff5f5..ff7a1f5f34a19b05eba48dba96c736dfdfdf7bac 100644
--- a/gcc/config/aarch64/aarch64-simd.md
+++ b/gcc/config/aarch64/aarch64-simd.md
@@ -7027,3 +7027,32 @@ 
   "xtn\t%0.<Vntype>, %1.<Vtype>"
   [(set_attr "type" "neon_shift_imm_narrow_q")]
 )
+
+;; bfcvtn
+(define_insn "aarch64_bfcvtn<q><mode>"
+  [(set (match_operand:V4SF_TO_BF 0 "register_operand" "=w")
+        (unspec:V4SF_TO_BF [(match_operand:V4SF 1 "register_operand" "w")]
+                            UNSPEC_BFCVTN))]
+  "TARGET_BF16_SIMD"
+  "bfcvtn\\t%0.4h, %1.4s"
+  [(set_attr "type" "f_cvt")]
+)
+
+(define_insn "aarch64_bfcvtn2v8bf"
+  [(set (match_operand:V8BF 0 "register_operand" "=w")
+        (unspec:V8BF [(match_operand:V8BF 1 "register_operand" "w")
+                      (match_operand:V4SF 2 "register_operand" "w")]
+                      UNSPEC_BFCVTN2))]
+  "TARGET_BF16_SIMD"
+  "bfcvtn2\\t%0.8h, %2.4s"
+  [(set_attr "type" "f_cvt")]
+)
+
+(define_insn "aarch64_bfcvtbf"
+  [(set (match_operand:BF 0 "register_operand" "=w")
+        (unspec:BF [(match_operand:SF 1 "register_operand" "w")]
+                    UNSPEC_BFCVT))]
+  "TARGET_BF16_SIMD"
+  "bfcvt\\t%h0, %s1"
+  [(set_attr "type" "f_cvt")]
+)
diff --git a/gcc/config/aarch64/arm_bf16.h b/gcc/config/aarch64/arm_bf16.h
index aedb0972735ce549fac1870bacd1ef3101e8fd26..1b9ab3690d35e153cd4f24b9e3bbb5b4cc4b4f4d 100644
--- a/gcc/config/aarch64/arm_bf16.h
+++ b/gcc/config/aarch64/arm_bf16.h
@@ -34,7 +34,15 @@ 
 #ifdef __ARM_FEATURE_BF16_SCALAR_ARITHMETIC
 
 typedef __bf16 bfloat16_t;
-
+typedef float float32_t;
+
+__extension__ extern __inline bfloat16_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vcvth_bf16_f32 \
+      (float32_t __a)
+{
+  return __builtin_aarch64_bfcvtbf (__a);
+}
 
 #endif
 #pragma GCC pop_options
diff --git a/gcc/config/aarch64/arm_neon.h b/gcc/config/aarch64/arm_neon.h
index 6cdbf381f0156ed993f03b847228b36ebbdd14f8..120f4b7d8827aee51834e75aeaa6ab8f8451980e 100644
--- a/gcc/config/aarch64/arm_neon.h
+++ b/gcc/config/aarch64/arm_neon.h
@@ -34610,6 +34610,35 @@  vrnd64xq_f64 (float64x2_t __a)
 
 #include "arm_bf16.h"
 
+#pragma GCC push_options
+#pragma GCC target ("arch=armv8.2-a+bf16")
+#ifdef __ARM_FEATURE_BF16_VECTOR_ARITHMETIC
+
+__extension__ extern __inline bfloat16x4_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vcvt_bf16_f32 (float32x4_t __a)
+{
+  return __builtin_aarch64_bfcvtnv4bf (__a);
+
+}
+
+__extension__ extern __inline bfloat16x8_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vcvtq_low_bf16_f32 (float32x4_t __a)
+{
+  return __builtin_aarch64_bfcvtn_qv8bf (__a);
+}
+
+__extension__ extern __inline bfloat16x8_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vcvtq_high_bf16_f32 (bfloat16x8_t __inactive, float32x4_t __a)
+{
+  return __builtin_aarch64_bfcvtn2v8bf (__inactive, __a);
+}
+
+#endif
+#pragma GCC pop_options
+
 #pragma GCC pop_options
 
 #undef __aarch64_vget_lane_any
diff --git a/gcc/config/aarch64/iterators.md b/gcc/config/aarch64/iterators.md
index 931166da5e47302afe810498eea9c8c2ab89b9de..f9f0bafb1eca4da42e564224fca1fd43d89f6ed1 100644
--- a/gcc/config/aarch64/iterators.md
+++ b/gcc/config/aarch64/iterators.md
@@ -431,6 +431,9 @@ 
 ;; SVE predicate modes that control 16-bit, 32-bit or 64-bit elements.
 (define_mode_iterator PRED_HSD [VNx8BI VNx4BI VNx2BI])
 
+;; Bfloat16 modes to which V4SF can be converted
+(define_mode_iterator V4SF_TO_BF [V4BF V8BF])
+
 ;; ------------------------------------------------------------------
 ;; Unspec enumerations for Advance SIMD. These could well go into
 ;; aarch64.md but for their use in int_iterators here.
@@ -673,6 +676,9 @@ 
     UNSPEC_UMULHS	; Used in aarch64-sve2.md.
     UNSPEC_UMULHRS	; Used in aarch64-sve2.md.
     UNSPEC_ASRD		; Used in aarch64-sve.md.
+    UNSPEC_BFCVTN	; Used in aarch64-simd.md.
+    UNSPEC_BFCVTN2	; Used in aarch64-simd.md.
+    UNSPEC_BFCVT	; Used in aarch64-simd.md.
 ])
 
 ;; ------------------------------------------------------------------
diff --git a/gcc/config/arm/types.md b/gcc/config/arm/types.md
index df39522f2ad63a52c910b1a6bcc7aa13aaf5d021..dbcb4d58798d7f51b1b8310cd446c58317d7b50d 100644
--- a/gcc/config/arm/types.md
+++ b/gcc/config/arm/types.md
@@ -1097,7 +1097,8 @@ 
   crypto_sm4,\
   coproc,\
   tme,\
-  memtag"
+  memtag,\
+  bf_cvt"
    (const_string "untyped"))
 
 ; Is this an (integer side) multiply with a 32-bit (or smaller) result?
diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/bfcvt-compile.c b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/bfcvt-compile.c
new file mode 100644
index 0000000000000000000000000000000000000000..ebe5b578c1fa82a6f2a166d55c7dc7e905b87135
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/bfcvt-compile.c
@@ -0,0 +1,56 @@ 
+/* { dg-do assemble { target { aarch64*-*-* } } } */
+/* { dg-require-effective-target arm_v8_2a_bf16_neon_ok } */
+/* { dg-add-options arm_v8_2a_bf16_neon } */
+/* { dg-additional-options "-save-temps" } */
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include <arm_neon.h>
+
+/*
+**test_bfcvtn:
+**	...
+**	bfcvtn\tv[0-9]+.4h, v[0-9]+.4s
+**	...
+*/
+bfloat16x4_t test_bfcvtn (float32x4_t a)
+{
+  return vcvt_bf16_f32 (a);
+}
+
+/*
+**test_bfcvtnq:
+**	...
+**	bfcvtn	v[0-9]+.4h, v[0-9]+.4s
+**	...
+*/
+bfloat16x8_t test_bfcvtnq (float32x4_t a)
+{
+  return vcvtq_low_bf16_f32 (a);
+}
+
+/*
+**test_bfcvtnq2:
+**	...
+**	bfcvtn	v[0-9]+.4h, v[0-9]+.4s
+**	...
+*/
+bfloat16x8_t test_bfcvtnq2 (bfloat16x8_t inactive, float32x4_t a)
+{
+  return vcvtq_high_bf16_f32 (inactive, a);
+}
+
+/*
+**test_bfcvt:
+**	...
+**	bfcvt	h[0-9]+, s[0-9]+
+**	...
+*/
+bfloat16_t test_bfcvt (float32_t a)
+{
+  return vcvth_bf16_f32 (a);
+}
+
+/* { dg-final { scan-assembler {bfcvtn\tv[0-9]+.4h, v[0-9]+.4s} } } */
+/* { dg-final { scan-assembler {bfcvtn\tv[0-9]+.4h, v[0-9]+.4s} } } */
+/* { dg-final { scan-assembler {bfcvtn\tv[0-9]+.4h, v[0-9]+.4s} } } */
+/* { dg-final { scan-assembler {bfcvt\th[0-9]+, s[0-9]+} } } */