Message ID | 67e55764-c543-a350-e685-40a419162c78@arm.com |
---|---|
State | New |
Headers | show |
Series | [AArch64] ACLE intrinsics for BFCVTN, BFCVTN2 (AArch64 AdvSIMD) and BFCVT (AArch64 FP) | expand |
Some of the comments on the BFMMLA/BFMLA[LT] patch apply here too. Delia Burduv <Delia.Burduv@arm.com> writes: > This patch adds the Armv8.6-a ACLE intrinsics for bfmmla, bfmlalb and > bfmlalt as part of the BFloat16 extension. That's the other patch :-) > [...] > diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md > index 55660ae248f4fa75d35ba2949cd4b9d5139ff5f5..ff7a1f5f34a19b05eba48dba96c736dfdfdf7bac 100644 > --- a/gcc/config/aarch64/aarch64-simd.md > +++ b/gcc/config/aarch64/aarch64-simd.md > @@ -7027,3 +7027,32 @@ > "xtn\t%0.<Vntype>, %1.<Vtype>" > [(set_attr "type" "neon_shift_imm_narrow_q")] > ) > + > +;; bfcvtn > +(define_insn "aarch64_bfcvtn<q><mode>" > + [(set (match_operand:V4SF_TO_BF 0 "register_operand" "=w") > + (unspec:V4SF_TO_BF [(match_operand:V4SF 1 "register_operand" "w")] > + UNSPEC_BFCVTN))] > + "TARGET_BF16_SIMD" > + "bfcvtn\\t%0.4h, %1.4s" > + [(set_attr "type" "f_cvt")] > +) > + If I've understood the naming convention correctly, the closest type seems to be "neon_fp_cvt_narrow_s_q". > +(define_insn "aarch64_bfcvtn2v8bf" > + [(set (match_operand:V8BF 0 "register_operand" "=w") > + (unspec:V8BF [(match_operand:V8BF 1 "register_operand" "w") > + (match_operand:V4SF 2 "register_operand" "w")] > + UNSPEC_BFCVTN2))] > + "TARGET_BF16_SIMD" > + "bfcvtn2\\t%0.8h, %2.4s" > + [(set_attr "type" "f_cvt")] > +) Same here. The constraint on operand 1 needs to be "0", otherwise operands 1 and 0 could end up in different registers. You could test for this using something like: bfloat16x8_t test_bfcvtnq2_untied (bfloat16x8_t unused, bfloat16x8_t inactive, float32x4_t a) { return vcvtq_high_bf16_f32 (inactive, a); } which when compiled at -O should produce something like: /* **test_bfcvtnq2_untied: ** mov v0\.8h, v1\.8h ** bfcvtn2 v0\.8h, v2\.4s ** ret */ (Completely untested, the code above is probably wrong.) > + > +(define_insn "aarch64_bfcvtbf" > + [(set (match_operand:BF 0 "register_operand" "=w") > + (unspec:BF [(match_operand:SF 1 "register_operand" "w")] > + UNSPEC_BFCVT))] > + "TARGET_BF16_SIMD" I think this just needs the scalar macro rather than *_SIMD. > + "bfcvt\\t%h0, %s1" > + [(set_attr "type" "f_cvt")] > +) > diff --git a/gcc/config/aarch64/arm_bf16.h b/gcc/config/aarch64/arm_bf16.h > index aedb0972735ce549fac1870bacd1ef3101e8fd26..1b9ab3690d35e153cd4f24b9e3bbb5b4cc4b4f4d 100644 > --- a/gcc/config/aarch64/arm_bf16.h > +++ b/gcc/config/aarch64/arm_bf16.h > @@ -34,7 +34,15 @@ > #ifdef __ARM_FEATURE_BF16_SCALAR_ARITHMETIC > > typedef __bf16 bfloat16_t; > - > +typedef float float32_t; > + > +__extension__ extern __inline bfloat16_t > +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) > +vcvth_bf16_f32 \ > + (float32_t __a) No need for the line break here. > +{ > + return __builtin_aarch64_bfcvtbf (__a); > +} > > #endif > #pragma GCC pop_options > diff --git a/gcc/config/aarch64/arm_neon.h b/gcc/config/aarch64/arm_neon.h > index 6cdbf381f0156ed993f03b847228b36ebbdd14f8..120f4b7d8827aee51834e75aeaa6ab8f8451980e 100644 > --- a/gcc/config/aarch64/arm_neon.h > +++ b/gcc/config/aarch64/arm_neon.h > @@ -34610,6 +34610,35 @@ vrnd64xq_f64 (float64x2_t __a) > > #include "arm_bf16.h" > > +#pragma GCC push_options > +#pragma GCC target ("arch=armv8.2-a+bf16") > +#ifdef __ARM_FEATURE_BF16_VECTOR_ARITHMETIC > + > +__extension__ extern __inline bfloat16x4_t > +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) > +vcvt_bf16_f32 (float32x4_t __a) > +{ > + return __builtin_aarch64_bfcvtnv4bf (__a); > + > +} Nit: extra blank line. > + > +__extension__ extern __inline bfloat16x8_t > +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) > +vcvtq_low_bf16_f32 (float32x4_t __a) > +{ > + return __builtin_aarch64_bfcvtn_qv8bf (__a); > +} > + > +__extension__ extern __inline bfloat16x8_t > +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) > +vcvtq_high_bf16_f32 (bfloat16x8_t __inactive, float32x4_t __a) > +{ > + return __builtin_aarch64_bfcvtn2v8bf (__inactive, __a); > +} > + > +#endif > +#pragma GCC pop_options > + > #pragma GCC pop_options > > #undef __aarch64_vget_lane_any > diff --git a/gcc/config/aarch64/iterators.md b/gcc/config/aarch64/iterators.md > index 931166da5e47302afe810498eea9c8c2ab89b9de..f9f0bafb1eca4da42e564224fca1fd43d89f6ed1 100644 > --- a/gcc/config/aarch64/iterators.md > +++ b/gcc/config/aarch64/iterators.md > @@ -431,6 +431,9 @@ > ;; SVE predicate modes that control 16-bit, 32-bit or 64-bit elements. > (define_mode_iterator PRED_HSD [VNx8BI VNx4BI VNx2BI]) > > +;; Bfloat16 modes to which V4SF can be converted > +(define_mode_iterator V4SF_TO_BF [V4BF V8BF]) > + > ;; ------------------------------------------------------------------ > ;; Unspec enumerations for Advance SIMD. These could well go into > ;; aarch64.md but for their use in int_iterators here. > @@ -673,6 +676,9 @@ > UNSPEC_UMULHS ; Used in aarch64-sve2.md. > UNSPEC_UMULHRS ; Used in aarch64-sve2.md. > UNSPEC_ASRD ; Used in aarch64-sve.md. > + UNSPEC_BFCVTN ; Used in aarch64-simd.md. > + UNSPEC_BFCVTN2 ; Used in aarch64-simd.md. > + UNSPEC_BFCVT ; Used in aarch64-simd.md. > ]) > > ;; ------------------------------------------------------------------ > diff --git a/gcc/config/arm/types.md b/gcc/config/arm/types.md > index df39522f2ad63a52c910b1a6bcc7aa13aaf5d021..dbcb4d58798d7f51b1b8310cd446c58317d7b50d 100644 > --- a/gcc/config/arm/types.md > +++ b/gcc/config/arm/types.md > @@ -1097,7 +1097,8 @@ > crypto_sm4,\ > coproc,\ > tme,\ > - memtag" > + memtag,\ > + bf_cvt" This doesn't seem to be used. > diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/bfcvt-compile.c b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/bfcvt-compile.c > new file mode 100644 > index 0000000000000000000000000000000000000000..ebe5b578c1fa82a6f2a166d55c7dc7e905b87135 > --- /dev/null > +++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/bfcvt-compile.c > @@ -0,0 +1,56 @@ > +/* { dg-do assemble { target { aarch64*-*-* } } } */ > +/* { dg-require-effective-target arm_v8_2a_bf16_neon_ok } */ > +/* { dg-add-options arm_v8_2a_bf16_neon } */ > +/* { dg-additional-options "-save-temps" } */ > +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ > + > +#include <arm_neon.h> > + > +/* > +**test_bfcvtn: > +** ... > +** bfcvtn\tv[0-9]+.4h, v[0-9]+.4s > +** ... > +*/ > +bfloat16x4_t test_bfcvtn (float32x4_t a) > +{ > + return vcvt_bf16_f32 (a); > +} > + > +/* > +**test_bfcvtnq: > +** ... > +** bfcvtn v[0-9]+.4h, v[0-9]+.4s > +** ... > +*/ > +bfloat16x8_t test_bfcvtnq (float32x4_t a) > +{ > + return vcvtq_low_bf16_f32 (a); > +} > + > +/* > +**test_bfcvtnq2: > +** ... > +** bfcvtn v[0-9]+.4h, v[0-9]+.4s > +** ... > +*/ > +bfloat16x8_t test_bfcvtnq2 (bfloat16x8_t inactive, float32x4_t a) > +{ > + return vcvtq_high_bf16_f32 (inactive, a); > +} > + > +/* > +**test_bfcvt: > +** ... > +** bfcvt h[0-9]+, s[0-9]+ > +** ... > +*/ > +bfloat16_t test_bfcvt (float32_t a) > +{ > + return vcvth_bf16_f32 (a); > +} > + > +/* { dg-final { scan-assembler {bfcvtn\tv[0-9]+.4h, v[0-9]+.4s} } } */ > +/* { dg-final { scan-assembler {bfcvtn\tv[0-9]+.4h, v[0-9]+.4s} } } */ > +/* { dg-final { scan-assembler {bfcvtn\tv[0-9]+.4h, v[0-9]+.4s} } } */ > +/* { dg-final { scan-assembler {bfcvt\th[0-9]+, s[0-9]+} } } */ Same comments as for the BFMMLA/BFMLA[BT] tests. As well as testing all these combinations for the SIMD case, it would be good to have a direct arm_bf16.h-only test for: #pragma GCC target "arch=armv8.2-a+bf16+nosimd" test_bfcvt should still work in that case. It would also be good to have a test that test_bfcvt reports an appropriate error if compiled after: #pragma GCC target "arch=armv8.2-a+nobf16" Thanks, Richard
Sorry for the confusion, what I meant to say was: This patch adds the Armv8.6-a ACLE intrinsics for bfcvtn, bfcvtn2 and bfcvt as part of the BFloat16 extension. (https://developer.arm.com/architectures/instruction-sets/simd-isas/neon/intrinsics) The intrinsics are declared in arm_bf16.h and arm_neon.h and the RTL patterns are defined in aarch64-simd.md. Tested for regression on aarch64-none-elf and aarch64_be-none-elf. I don't have commit rights, so if this is ok can someone please commit it for me? Here is the updated patch. Thank you, Delia gcc/ChangeLog: 2019-11-06 Delia Burduv <delia.burduv@arm.com> * config/aarch64/aarch64-simd-builtins.def (bfcvtn): New built-in function. (bfcvtn_q): New built-in function. (bfcvtn2): New built-in function. (bfcvt): New built-in function. * config/aarch64/aarch64-simd.md (aarch64_bfcvtn<q><mode>): New pattern. (aarch64_bfcvtn2v8bf): New pattern. (aarch64_bfcvtbf): New pattern. * config/aarch64/arm_bf16.h (float32_t): New typedef. (vcvth_bf16_f32): New intrinsic. * config/aarch64/arm_bf16.h (vcvt_bf16_f32): New intrinsic. (vcvtq_low_bf16_f32): New intrinsic. (vcvtq_high_bf16_f32): New intrinsic. * config/aarch64/iterators.md (V4SF_TO_BF): New mode iterator. (UNSPEC_BFCVTN): New UNSPEC. (UNSPEC_BFCVTN2): New UNSPEC. (UNSPEC_BFCVT): New UNSPEC. * config/arm/types.md (bf_cvt): New type. gcc/testsuite/ChangeLog: 2020-01-31 Delia Burduv <delia.burduv@arm.com> * gcc.target/aarch64/advsimd-intrinsics/bfcvt-compile.c: New test. * gcc.target/aarch64/advsimd-intrinsics/bfcvt-nobf16.c: New test. * gcc.target/aarch64/advsimd-intrinsics/bfcvt-nosimd.c: New test. * gcc.target/aarch64/advsimd-intrinsics/bfcvtnq2-untied.c: New test. On 12/23/19 6:30 PM, Richard Sandiford wrote: > Some of the comments on the BFMMLA/BFMLA[LT] patch apply here too. > > Delia Burduv <Delia.Burduv@arm.com> writes: >> This patch adds the Armv8.6-a ACLE intrinsics for bfmmla, bfmlalb and >> bfmlalt as part of the BFloat16 extension. > > That's the other patch :-) > >> [...] >> diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md >> index 55660ae248f4fa75d35ba2949cd4b9d5139ff5f5..ff7a1f5f34a19b05eba48dba96c736dfdfdf7bac 100644 >> --- a/gcc/config/aarch64/aarch64-simd.md >> +++ b/gcc/config/aarch64/aarch64-simd.md >> @@ -7027,3 +7027,32 @@ >> "xtn\t%0.<Vntype>, %1.<Vtype>" >> [(set_attr "type" "neon_shift_imm_narrow_q")] >> ) >> + >> +;; bfcvtn >> +(define_insn "aarch64_bfcvtn<q><mode>" >> + [(set (match_operand:V4SF_TO_BF 0 "register_operand" "=w") >> + (unspec:V4SF_TO_BF [(match_operand:V4SF 1 "register_operand" "w")] >> + UNSPEC_BFCVTN))] >> + "TARGET_BF16_SIMD" >> + "bfcvtn\\t%0.4h, %1.4s" >> + [(set_attr "type" "f_cvt")] >> +) >> + > > If I've understood the naming convention correctly, the closest type > seems to be "neon_fp_cvt_narrow_s_q". > >> +(define_insn "aarch64_bfcvtn2v8bf" >> + [(set (match_operand:V8BF 0 "register_operand" "=w") >> + (unspec:V8BF [(match_operand:V8BF 1 "register_operand" "w") >> + (match_operand:V4SF 2 "register_operand" "w")] >> + UNSPEC_BFCVTN2))] >> + "TARGET_BF16_SIMD" >> + "bfcvtn2\\t%0.8h, %2.4s" >> + [(set_attr "type" "f_cvt")] >> +) > > Same here. > > The constraint on operand 1 needs to be "0", otherwise operands 1 and 0 > could end up in different registers. You could test for this using > something like: > > bfloat16x8_t test_bfcvtnq2_untied (bfloat16x8_t unused, bfloat16x8_t inactive, > float32x4_t a) > { > return vcvtq_high_bf16_f32 (inactive, a); > } > > which when compiled at -O should produce something like: > > /* > **test_bfcvtnq2_untied: > ** mov v0\.8h, v1\.8h > ** bfcvtn2 v0\.8h, v2\.4s > ** ret > */ > > (Completely untested, the code above is probably wrong.) > >> + >> +(define_insn "aarch64_bfcvtbf" >> + [(set (match_operand:BF 0 "register_operand" "=w") >> + (unspec:BF [(match_operand:SF 1 "register_operand" "w")] >> + UNSPEC_BFCVT))] >> + "TARGET_BF16_SIMD" > > I think this just needs the scalar macro rather than *_SIMD. > >> + "bfcvt\\t%h0, %s1" >> + [(set_attr "type" "f_cvt")] >> +) >> diff --git a/gcc/config/aarch64/arm_bf16.h b/gcc/config/aarch64/arm_bf16.h >> index aedb0972735ce549fac1870bacd1ef3101e8fd26..1b9ab3690d35e153cd4f24b9e3bbb5b4cc4b4f4d 100644 >> --- a/gcc/config/aarch64/arm_bf16.h >> +++ b/gcc/config/aarch64/arm_bf16.h >> @@ -34,7 +34,15 @@ >> #ifdef __ARM_FEATURE_BF16_SCALAR_ARITHMETIC >> >> typedef __bf16 bfloat16_t; >> - >> +typedef float float32_t; >> + >> +__extension__ extern __inline bfloat16_t >> +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) >> +vcvth_bf16_f32 \ >> + (float32_t __a) > > No need for the line break here. > >> +{ >> + return __builtin_aarch64_bfcvtbf (__a); >> +} >> >> #endif >> #pragma GCC pop_options >> diff --git a/gcc/config/aarch64/arm_neon.h b/gcc/config/aarch64/arm_neon.h >> index 6cdbf381f0156ed993f03b847228b36ebbdd14f8..120f4b7d8827aee51834e75aeaa6ab8f8451980e 100644 >> --- a/gcc/config/aarch64/arm_neon.h >> +++ b/gcc/config/aarch64/arm_neon.h >> @@ -34610,6 +34610,35 @@ vrnd64xq_f64 (float64x2_t __a) >> >> #include "arm_bf16.h" >> >> +#pragma GCC push_options >> +#pragma GCC target ("arch=armv8.2-a+bf16") >> +#ifdef __ARM_FEATURE_BF16_VECTOR_ARITHMETIC >> + >> +__extension__ extern __inline bfloat16x4_t >> +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) >> +vcvt_bf16_f32 (float32x4_t __a) >> +{ >> + return __builtin_aarch64_bfcvtnv4bf (__a); >> + >> +} > > Nit: extra blank line. > >> + >> +__extension__ extern __inline bfloat16x8_t >> +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) >> +vcvtq_low_bf16_f32 (float32x4_t __a) >> +{ >> + return __builtin_aarch64_bfcvtn_qv8bf (__a); >> +} >> + >> +__extension__ extern __inline bfloat16x8_t >> +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) >> +vcvtq_high_bf16_f32 (bfloat16x8_t __inactive, float32x4_t __a) >> +{ >> + return __builtin_aarch64_bfcvtn2v8bf (__inactive, __a); >> +} >> + >> +#endif >> +#pragma GCC pop_options >> + >> #pragma GCC pop_options >> >> #undef __aarch64_vget_lane_any >> diff --git a/gcc/config/aarch64/iterators.md b/gcc/config/aarch64/iterators.md >> index 931166da5e47302afe810498eea9c8c2ab89b9de..f9f0bafb1eca4da42e564224fca1fd43d89f6ed1 100644 >> --- a/gcc/config/aarch64/iterators.md >> +++ b/gcc/config/aarch64/iterators.md >> @@ -431,6 +431,9 @@ >> ;; SVE predicate modes that control 16-bit, 32-bit or 64-bit elements. >> (define_mode_iterator PRED_HSD [VNx8BI VNx4BI VNx2BI]) >> >> +;; Bfloat16 modes to which V4SF can be converted >> +(define_mode_iterator V4SF_TO_BF [V4BF V8BF]) >> + >> ;; ------------------------------------------------------------------ >> ;; Unspec enumerations for Advance SIMD. These could well go into >> ;; aarch64.md but for their use in int_iterators here. >> @@ -673,6 +676,9 @@ >> UNSPEC_UMULHS ; Used in aarch64-sve2.md. >> UNSPEC_UMULHRS ; Used in aarch64-sve2.md. >> UNSPEC_ASRD ; Used in aarch64-sve.md. >> + UNSPEC_BFCVTN ; Used in aarch64-simd.md. >> + UNSPEC_BFCVTN2 ; Used in aarch64-simd.md. >> + UNSPEC_BFCVT ; Used in aarch64-simd.md. >> ]) >> >> ;; ------------------------------------------------------------------ >> diff --git a/gcc/config/arm/types.md b/gcc/config/arm/types.md >> index df39522f2ad63a52c910b1a6bcc7aa13aaf5d021..dbcb4d58798d7f51b1b8310cd446c58317d7b50d 100644 >> --- a/gcc/config/arm/types.md >> +++ b/gcc/config/arm/types.md >> @@ -1097,7 +1097,8 @@ >> crypto_sm4,\ >> coproc,\ >> tme,\ >> - memtag" >> + memtag,\ >> + bf_cvt" > > This doesn't seem to be used. > >> diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/bfcvt-compile.c b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/bfcvt-compile.c >> new file mode 100644 >> index 0000000000000000000000000000000000000000..ebe5b578c1fa82a6f2a166d55c7dc7e905b87135 >> --- /dev/null >> +++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/bfcvt-compile.c >> @@ -0,0 +1,56 @@ >> +/* { dg-do assemble { target { aarch64*-*-* } } } */ >> +/* { dg-require-effective-target arm_v8_2a_bf16_neon_ok } */ >> +/* { dg-add-options arm_v8_2a_bf16_neon } */ >> +/* { dg-additional-options "-save-temps" } */ >> +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ >> + >> +#include <arm_neon.h> >> + >> +/* >> +**test_bfcvtn: >> +** ... >> +** bfcvtn\tv[0-9]+.4h, v[0-9]+.4s >> +** ... >> +*/ >> +bfloat16x4_t test_bfcvtn (float32x4_t a) >> +{ >> + return vcvt_bf16_f32 (a); >> +} >> + >> +/* >> +**test_bfcvtnq: >> +** ... >> +** bfcvtn v[0-9]+.4h, v[0-9]+.4s >> +** ... >> +*/ >> +bfloat16x8_t test_bfcvtnq (float32x4_t a) >> +{ >> + return vcvtq_low_bf16_f32 (a); >> +} >> + >> +/* >> +**test_bfcvtnq2: >> +** ... >> +** bfcvtn v[0-9]+.4h, v[0-9]+.4s >> +** ... >> +*/ >> +bfloat16x8_t test_bfcvtnq2 (bfloat16x8_t inactive, float32x4_t a) >> +{ >> + return vcvtq_high_bf16_f32 (inactive, a); >> +} >> + >> +/* >> +**test_bfcvt: >> +** ... >> +** bfcvt h[0-9]+, s[0-9]+ >> +** ... >> +*/ >> +bfloat16_t test_bfcvt (float32_t a) >> +{ >> + return vcvth_bf16_f32 (a); >> +} >> + >> +/* { dg-final { scan-assembler {bfcvtn\tv[0-9]+.4h, v[0-9]+.4s} } } */ >> +/* { dg-final { scan-assembler {bfcvtn\tv[0-9]+.4h, v[0-9]+.4s} } } */ >> +/* { dg-final { scan-assembler {bfcvtn\tv[0-9]+.4h, v[0-9]+.4s} } } */ >> +/* { dg-final { scan-assembler {bfcvt\th[0-9]+, s[0-9]+} } } */ > > Same comments as for the BFMMLA/BFMLA[BT] tests. > > As well as testing all these combinations for the SIMD case, > it would be good to have a direct arm_bf16.h-only test for: > > #pragma GCC target "arch=armv8.2-a+bf16+nosimd" > > test_bfcvt should still work in that case. > > It would also be good to have a test that test_bfcvt reports > an appropriate error if compiled after: > > #pragma GCC target "arch=armv8.2-a+nobf16" > > Thanks, > Richard >
Delia Burduv <Delia.Burduv@arm.com> writes: > [...] > diff --git a/gcc/config/aarch64/arm_bf16.h b/gcc/config/aarch64/arm_bf16.h > index 3759c0d1cb449a7f0125cc2a1433127564d66622..fb2150e1d60a590046e2c034422021aafc721e23 100644 > --- a/gcc/config/aarch64/arm_bf16.h > +++ b/gcc/config/aarch64/arm_bf16.h > @@ -28,5 +28,13 @@ > #define _AARCH64_BF16_H_ > > typedef __bf16 bfloat16_t; > +typedef float float32_t; > + > +__extension__ extern __inline bfloat16_t > +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) > +vcvth_bf16_f32 (float32_t __a) > +{ > + return __builtin_aarch64_bfcvtbf (__a); > +} Sorry for not noticing last time, but this should be wrapped in: #pragma GCC push_options #pragma GCC target ("+nothing+bf16") ... #pragma GCC pop_options otherwise I think calling this function without +bf16 would trigger an internal compiler error. It would be good to have a test that that doesn't happen (something along the lines of bfcvt-nobf16.c, but for the scalar case). > [...] > diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/bfcvt-compile.c b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/bfcvt-compile.c > new file mode 100644 > index 0000000000000000000000000000000000000000..ffb5305e2e5ea1aadae07e82fd8ed6f9f247c1a9 > --- /dev/null > +++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/bfcvt-compile.c > @@ -0,0 +1,48 @@ > +/* { dg-do assemble { target { aarch64*-*-* } } } */ The { target ... } isn't necessary here. (Missed that in the other review, sorry.) > +/* { dg-require-effective-target arm_v8_2a_bf16_neon_ok } */ > +/* { dg-add-options arm_v8_2a_bf16_neon } */ > +/* { dg-additional-options "-save-temps" } */ > +/* { dg-final { check-function-bodies "**" "" {-O[^0]} } } */ > +/* { dg-skip-if "" { *-*-* } { "-fno-fat-lto-objects" } } */ > + > +#include <arm_neon.h> > + > +/* > +**test_bfcvtn: > +** bfcvtn\tv0.4h, v0.4s Like with the other review, I think the literal tab you had in the original patch looks better than \t. > [...] > diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/bfcvt-nosimd.c b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/bfcvt-nosimd.c > new file mode 100644 > index 0000000000000000000000000000000000000000..8d7dffe16275de60e884c449afa0fea0b1af6081 > --- /dev/null > +++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/bfcvt-nosimd.c > @@ -0,0 +1,15 @@ > +/* { dg-do assemble { target { aarch64*-*-* } } } */ This needs: /* { dg-require-effective-target aarch64_asm_bf16_ok } */ (Doesn't exist yet, but I hope to post a patch soon.) Looks good otherwise, thanks. Richard
Hi Richard, > 0000000000000000000000000000000000000000..ffb5305e2e5ea1aadae07e82f > d8e > > d6f9f247c1a9 > > --- /dev/null > > +++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/bfcvt-compil > > +++ e.c > > @@ -0,0 +1,48 @@ > > +/* { dg-do assemble { target { aarch64*-*-* } } } */ > > The { target ... } isn't necessary here. (Missed that in the other review, sorry.) > Why not? The advsimd-intrinsics tests are shared between both AArch32 and AArch64. Tamar. > > +/* { dg-require-effective-target arm_v8_2a_bf16_neon_ok } */ > > +/* { dg-add-options arm_v8_2a_bf16_neon } */ > > +/* { dg-additional-options "-save-temps" } */ > > +/* { dg-final { check-function-bodies "**" "" {-O[^0]} } } */ > > +/* { dg-skip-if "" { *-*-* } { "-fno-fat-lto-objects" } } */ > > + > > +#include <arm_neon.h> > > + > > +/* > > +**test_bfcvtn: > > +** bfcvtn\tv0.4h, v0.4s > > Like with the other review, I think the literal tab you had in the original patch > looks better than \t. > > > [...] > > diff --git > > a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/bfcvt-nosimd.c > > b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/bfcvt-nosimd.c > > new file mode 100644 > > index > > > 0000000000000000000000000000000000000000..8d7dffe16275de60e884c449af > a0 > > fea0b1af6081 > > --- /dev/null > > +++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/bfcvt-nosimd > > +++ .c > > @@ -0,0 +1,15 @@ > > +/* { dg-do assemble { target { aarch64*-*-* } } } */ > > This needs: > > /* { dg-require-effective-target aarch64_asm_bf16_ok } */ > > (Doesn't exist yet, but I hope to post a patch soon.) > > Looks good otherwise, thanks. > > Richard
Tamar Christina <Tamar.Christina@arm.com> writes: > Hi Richard, > >> 0000000000000000000000000000000000000000..ffb5305e2e5ea1aadae07e82f >> d8e >> > d6f9f247c1a9 >> > --- /dev/null >> > +++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/bfcvt-compil >> > +++ e.c >> > @@ -0,0 +1,48 @@ >> > +/* { dg-do assemble { target { aarch64*-*-* } } } */ >> >> The { target ... } isn't necessary here. (Missed that in the other review, sorry.) >> > > Why not? The advsimd-intrinsics tests are shared between both AArch32 and AArch64. Ah, so they are. Think it would better to move them to a new gcc.target/arm-common or something in that case. Tests in gcc.target/aarch64 really ought to be specific to aarch64. Thanks, Richard > > Tamar. > >> > +/* { dg-require-effective-target arm_v8_2a_bf16_neon_ok } */ >> > +/* { dg-add-options arm_v8_2a_bf16_neon } */ >> > +/* { dg-additional-options "-save-temps" } */ >> > +/* { dg-final { check-function-bodies "**" "" {-O[^0]} } } */ >> > +/* { dg-skip-if "" { *-*-* } { "-fno-fat-lto-objects" } } */ >> > + >> > +#include <arm_neon.h> >> > + >> > +/* >> > +**test_bfcvtn: >> > +** bfcvtn\tv0.4h, v0.4s >> >> Like with the other review, I think the literal tab you had in the original patch >> looks better than \t. >> >> > [...] >> > diff --git >> > a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/bfcvt-nosimd.c >> > b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/bfcvt-nosimd.c >> > new file mode 100644 >> > index >> > >> 0000000000000000000000000000000000000000..8d7dffe16275de60e884c449af >> a0 >> > fea0b1af6081 >> > --- /dev/null >> > +++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/bfcvt-nosimd >> > +++ .c >> > @@ -0,0 +1,15 @@ >> > +/* { dg-do assemble { target { aarch64*-*-* } } } */ >> >> This needs: >> >> /* { dg-require-effective-target aarch64_asm_bf16_ok } */ >> >> (Doesn't exist yet, but I hope to post a patch soon.) >> >> Looks good otherwise, thanks. >> >> Richard
Hi, Here is the latest version of the patch. On 2/18/20 1:51 PM, Richard Sandiford wrote: > Tamar Christina <Tamar.Christina@arm.com> writes: >> Hi Richard, >> >>> 0000000000000000000000000000000000000000..ffb5305e2e5ea1aadae07e82f >>> d8e >>>> d6f9f247c1a9 >>>> --- /dev/null >>>> +++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/bfcvt-compil >>>> +++ e.c >>>> @@ -0,0 +1,48 @@ >>>> +/* { dg-do assemble { target { aarch64*-*-* } } } */ >>> >>> The { target ... } isn't necessary here. (Missed that in the other review, sorry.) >>> >> >> Why not? The advsimd-intrinsics tests are shared between both AArch32 and AArch64. > > Ah, so they are. Think it would better to move them to a new > gcc.target/arm-common or something in that case. Tests in > gcc.target/aarch64 really ought to be specific to aarch64. > > Thanks, > Richard > I left the advsimd-intrinsics tests shared since creating a new gcc.target/arm-common should probably be a separate patch. Let me know if this patch is ok. And if it is, can someone please commit it for me? Thanks, Delia >> >> Tamar. >> >>>> +/* { dg-require-effective-target arm_v8_2a_bf16_neon_ok } */ >>>> +/* { dg-add-options arm_v8_2a_bf16_neon } */ >>>> +/* { dg-additional-options "-save-temps" } */ >>>> +/* { dg-final { check-function-bodies "**" "" {-O[^0]} } } */ >>>> +/* { dg-skip-if "" { *-*-* } { "-fno-fat-lto-objects" } } */ >>>> + >>>> +#include <arm_neon.h> >>>> + >>>> +/* >>>> +**test_bfcvtn: >>>> +** bfcvtn\tv0.4h, v0.4s >>> >>> Like with the other review, I think the literal tab you had in the original patch >>> looks better than \t. >>> >>>> [...] >>>> diff --git >>>> a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/bfcvt-nosimd.c >>>> b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/bfcvt-nosimd.c >>>> new file mode 100644 >>>> index >>>> >>> 0000000000000000000000000000000000000000..8d7dffe16275de60e884c449af >>> a0 >>>> fea0b1af6081 >>>> --- /dev/null >>>> +++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/bfcvt-nosimd >>>> +++ .c >>>> @@ -0,0 +1,15 @@ >>>> +/* { dg-do assemble { target { aarch64*-*-* } } } */ >>> >>> This needs: >>> >>> /* { dg-require-effective-target aarch64_asm_bf16_ok } */ >>> >>> (Doesn't exist yet, but I hope to post a patch soon.) >>> >>> Looks good otherwise, thanks. >>> >>> Richard
Hi, Thanks for the update and sorry for the slow reply. When I try the patch locally I get: FAIL: gcc.target/aarch64/advsimd-intrinsics/bfcvt-nosimd.c -O0 (test for excess errors) FAIL: gcc.target/aarch64/advsimd-intrinsics/bfcvt-nosimd.c -O1 (test for excess errors) FAIL: gcc.target/aarch64/advsimd-intrinsics/bfcvt-nosimd.c -O2 (test for excess errors) FAIL: gcc.target/aarch64/advsimd-intrinsics/bfcvt-nosimd.c -O2 -flto -fno-use-linker-plugin -flto-partition=none (test for excess errors) FAIL: gcc.target/aarch64/advsimd-intrinsics/bfcvt-nosimd.c -O2 -flto -fuse-linker-plugin -fno-fat-lto-objects (test for excess errors) FAIL: gcc.target/aarch64/advsimd-intrinsics/bfcvt-nosimd.c -O3 -g (test for excess errors) FAIL: gcc.target/aarch64/advsimd-intrinsics/bfcvt-nosimd.c -Og -g (test for excess errors) FAIL: gcc.target/aarch64/advsimd-intrinsics/bfcvt-nosimd.c -Os (test for excess errors) I think that's because: Delia Burduv <delia.burduv@arm.com> writes: > diff --git a/gcc/config/aarch64/arm_bf16.h b/gcc/config/aarch64/arm_bf16.h > index 3759c0d1cb449a7f0125cc2a1433127564d66622..fa7080c2953bc3254f01d842a8afef917d469080 100644 > --- a/gcc/config/aarch64/arm_bf16.h > +++ b/gcc/config/aarch64/arm_bf16.h > @@ -27,6 +27,19 @@ > #ifndef _AARCH64_BF16_H_ > #define _AARCH64_BF16_H_ > > +#pragma GCC push_options > +#pragma GCC target ("+nothing+bf16") > + > typedef __bf16 bfloat16_t; > +typedef float float32_t; > + > +__extension__ extern __inline bfloat16_t > +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) > +vcvth_bf16_f32 (float32_t __a) > +{ > + return __builtin_aarch64_bfcvtbf (__a); > +} > + > +#pragma GCC pop_options "+bf16" implicitly enables "+simd", so functions guarded with "+nothing+bf16" are only available when "+simd" is available. I think we want "+nothing+bf16+nosimd" instead. (Haven't tested that though.) Very minor, but: it might be clearer to leave the typedefs outside of the #pragma block. It doesn't make any difference to the behaviour, but it emphasises that the typedefs really are available unconditionally. Looks ready to go otherwise. Thanks, Richard
Hi, Here is the latest version of the patch. That test should now work. Thanks, Delia On 3/5/20 11:06 AM, Richard Sandiford wrote: > Hi, > > Thanks for the update and sorry for the slow reply. > > When I try the patch locally I get: > > FAIL: gcc.target/aarch64/advsimd-intrinsics/bfcvt-nosimd.c -O0 (test for excess errors) > FAIL: gcc.target/aarch64/advsimd-intrinsics/bfcvt-nosimd.c -O1 (test for excess errors) > FAIL: gcc.target/aarch64/advsimd-intrinsics/bfcvt-nosimd.c -O2 (test for excess errors) > FAIL: gcc.target/aarch64/advsimd-intrinsics/bfcvt-nosimd.c -O2 -flto -fno-use-linker-plugin -flto-partition=none (test for excess errors) > FAIL: gcc.target/aarch64/advsimd-intrinsics/bfcvt-nosimd.c -O2 -flto -fuse-linker-plugin -fno-fat-lto-objects (test for excess errors) > FAIL: gcc.target/aarch64/advsimd-intrinsics/bfcvt-nosimd.c -O3 -g (test for excess errors) > FAIL: gcc.target/aarch64/advsimd-intrinsics/bfcvt-nosimd.c -Og -g (test for excess errors) > FAIL: gcc.target/aarch64/advsimd-intrinsics/bfcvt-nosimd.c -Os (test for excess errors) > > I think that's because: > > Delia Burduv <delia.burduv@arm.com> writes: >> diff --git a/gcc/config/aarch64/arm_bf16.h b/gcc/config/aarch64/arm_bf16.h >> index 3759c0d1cb449a7f0125cc2a1433127564d66622..fa7080c2953bc3254f01d842a8afef917d469080 100644 >> --- a/gcc/config/aarch64/arm_bf16.h >> +++ b/gcc/config/aarch64/arm_bf16.h >> @@ -27,6 +27,19 @@ >> #ifndef _AARCH64_BF16_H_ >> #define _AARCH64_BF16_H_ >> >> +#pragma GCC push_options >> +#pragma GCC target ("+nothing+bf16") >> + >> typedef __bf16 bfloat16_t; >> +typedef float float32_t; >> + >> +__extension__ extern __inline bfloat16_t >> +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) >> +vcvth_bf16_f32 (float32_t __a) >> +{ >> + return __builtin_aarch64_bfcvtbf (__a); >> +} >> + >> +#pragma GCC pop_options > > "+bf16" implicitly enables "+simd", so functions guarded with > "+nothing+bf16" are only available when "+simd" is available. > I think we want "+nothing+bf16+nosimd" instead. (Haven't tested > that though.) > > Very minor, but: it might be clearer to leave the typedefs outside > of the #pragma block. It doesn't make any difference to the behaviour, > but it emphasises that the typedefs really are available unconditionally. > > Looks ready to go otherwise. > > Thanks, > Richard >
Delia Burduv <delia.burduv@arm.com> writes: > Hi, > > Here is the latest version of the patch. That test should now work. Thanks, pushed. Richard
Hi, I think this commit causes a failure on aarch64-none-elf due to a DejaGnu typo in gcc.target/aarch64/advsimd-intrinsics/bfcvt-nosimd.c. { dg-final { check-function-bodies "**" "" "-O[^0]" } } I think the square brackets need to be escaped or use {-O[^0]}. Regards Vasee Fri, Mar 06, 2020 at 09:54:07AM +0000, Richard Sandiford wrote: > Delia Burduv <delia.burduv@arm.com> writes: > > Hi, > > > > Here is the latest version of the patch. That test should now work. > > Thanks, pushed. > > Richard
Vasee Vinayagamoorthy <vaseeharan.vinayagamoorthy@arm.com> writes: > Hi, > > I think this commit causes a failure on aarch64-none-elf due to a DejaGnu > typo in gcc.target/aarch64/advsimd-intrinsics/bfcvt-nosimd.c. > > { dg-final { check-function-bodies "**" "" "-O[^0]" } } > > I think the square brackets need to be escaped or use {-O[^0]}. Gah, this is my fault. I'd noticed that the line previously used "-DCHECK_ASM", which meant that the test would never be performed (because nothing ever adds -DCHECK_ASM for this test harness). I should have noticed that during previous review cycles and didn't want to force another review round over it, so I made the change locally. But then I compounded the mistake by messing up the testing somehow, and also forgetting about the change when doing the commit. :-( Really sorry about that, and sorry especially to Delia for messing up her patch. It sounds from off-list discussion like you have a fix in the works, so I'll defer to that. Thanks, Richard > > Regards > Vasee > > Fri, Mar 06, 2020 at 09:54:07AM +0000, Richard Sandiford wrote: >> Delia Burduv <delia.burduv@arm.com> writes: >> > Hi, >> > >> > Here is the latest version of the patch. That test should now work. >> >> Thanks, pushed. >> >> Richard
diff --git a/gcc/config/aarch64/aarch64-simd-builtins.def b/gcc/config/aarch64/aarch64-simd-builtins.def index f4ca35a59704c761fe2ac2b6d401fff7c8aba80d..30a425bd3aec121e78f269f44e188bdb8d39e75f 100644 --- a/gcc/config/aarch64/aarch64-simd-builtins.def +++ b/gcc/config/aarch64/aarch64-simd-builtins.def @@ -682,3 +682,9 @@ BUILTIN_VSFDF (UNOP, frint32x, 0) BUILTIN_VSFDF (UNOP, frint64z, 0) BUILTIN_VSFDF (UNOP, frint64x, 0) + + /* Implemented by aarch64_bfcvtn{q}{2}<mode> */ + VAR1 (UNOP, bfcvtn, 0, v4bf) + VAR1 (UNOP, bfcvtn_q, 0, v8bf) + VAR1 (BINOP, bfcvtn2, 0, v8bf) + VAR1 (UNOP, bfcvt, 0, bf) diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md index 55660ae248f4fa75d35ba2949cd4b9d5139ff5f5..ff7a1f5f34a19b05eba48dba96c736dfdfdf7bac 100644 --- a/gcc/config/aarch64/aarch64-simd.md +++ b/gcc/config/aarch64/aarch64-simd.md @@ -7027,3 +7027,32 @@ "xtn\t%0.<Vntype>, %1.<Vtype>" [(set_attr "type" "neon_shift_imm_narrow_q")] ) + +;; bfcvtn +(define_insn "aarch64_bfcvtn<q><mode>" + [(set (match_operand:V4SF_TO_BF 0 "register_operand" "=w") + (unspec:V4SF_TO_BF [(match_operand:V4SF 1 "register_operand" "w")] + UNSPEC_BFCVTN))] + "TARGET_BF16_SIMD" + "bfcvtn\\t%0.4h, %1.4s" + [(set_attr "type" "f_cvt")] +) + +(define_insn "aarch64_bfcvtn2v8bf" + [(set (match_operand:V8BF 0 "register_operand" "=w") + (unspec:V8BF [(match_operand:V8BF 1 "register_operand" "w") + (match_operand:V4SF 2 "register_operand" "w")] + UNSPEC_BFCVTN2))] + "TARGET_BF16_SIMD" + "bfcvtn2\\t%0.8h, %2.4s" + [(set_attr "type" "f_cvt")] +) + +(define_insn "aarch64_bfcvtbf" + [(set (match_operand:BF 0 "register_operand" "=w") + (unspec:BF [(match_operand:SF 1 "register_operand" "w")] + UNSPEC_BFCVT))] + "TARGET_BF16_SIMD" + "bfcvt\\t%h0, %s1" + [(set_attr "type" "f_cvt")] +) diff --git a/gcc/config/aarch64/arm_bf16.h b/gcc/config/aarch64/arm_bf16.h index aedb0972735ce549fac1870bacd1ef3101e8fd26..1b9ab3690d35e153cd4f24b9e3bbb5b4cc4b4f4d 100644 --- a/gcc/config/aarch64/arm_bf16.h +++ b/gcc/config/aarch64/arm_bf16.h @@ -34,7 +34,15 @@ #ifdef __ARM_FEATURE_BF16_SCALAR_ARITHMETIC typedef __bf16 bfloat16_t; - +typedef float float32_t; + +__extension__ extern __inline bfloat16_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vcvth_bf16_f32 \ + (float32_t __a) +{ + return __builtin_aarch64_bfcvtbf (__a); +} #endif #pragma GCC pop_options diff --git a/gcc/config/aarch64/arm_neon.h b/gcc/config/aarch64/arm_neon.h index 6cdbf381f0156ed993f03b847228b36ebbdd14f8..120f4b7d8827aee51834e75aeaa6ab8f8451980e 100644 --- a/gcc/config/aarch64/arm_neon.h +++ b/gcc/config/aarch64/arm_neon.h @@ -34610,6 +34610,35 @@ vrnd64xq_f64 (float64x2_t __a) #include "arm_bf16.h" +#pragma GCC push_options +#pragma GCC target ("arch=armv8.2-a+bf16") +#ifdef __ARM_FEATURE_BF16_VECTOR_ARITHMETIC + +__extension__ extern __inline bfloat16x4_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vcvt_bf16_f32 (float32x4_t __a) +{ + return __builtin_aarch64_bfcvtnv4bf (__a); + +} + +__extension__ extern __inline bfloat16x8_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vcvtq_low_bf16_f32 (float32x4_t __a) +{ + return __builtin_aarch64_bfcvtn_qv8bf (__a); +} + +__extension__ extern __inline bfloat16x8_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vcvtq_high_bf16_f32 (bfloat16x8_t __inactive, float32x4_t __a) +{ + return __builtin_aarch64_bfcvtn2v8bf (__inactive, __a); +} + +#endif +#pragma GCC pop_options + #pragma GCC pop_options #undef __aarch64_vget_lane_any diff --git a/gcc/config/aarch64/iterators.md b/gcc/config/aarch64/iterators.md index 931166da5e47302afe810498eea9c8c2ab89b9de..f9f0bafb1eca4da42e564224fca1fd43d89f6ed1 100644 --- a/gcc/config/aarch64/iterators.md +++ b/gcc/config/aarch64/iterators.md @@ -431,6 +431,9 @@ ;; SVE predicate modes that control 16-bit, 32-bit or 64-bit elements. (define_mode_iterator PRED_HSD [VNx8BI VNx4BI VNx2BI]) +;; Bfloat16 modes to which V4SF can be converted +(define_mode_iterator V4SF_TO_BF [V4BF V8BF]) + ;; ------------------------------------------------------------------ ;; Unspec enumerations for Advance SIMD. These could well go into ;; aarch64.md but for their use in int_iterators here. @@ -673,6 +676,9 @@ UNSPEC_UMULHS ; Used in aarch64-sve2.md. UNSPEC_UMULHRS ; Used in aarch64-sve2.md. UNSPEC_ASRD ; Used in aarch64-sve.md. + UNSPEC_BFCVTN ; Used in aarch64-simd.md. + UNSPEC_BFCVTN2 ; Used in aarch64-simd.md. + UNSPEC_BFCVT ; Used in aarch64-simd.md. ]) ;; ------------------------------------------------------------------ diff --git a/gcc/config/arm/types.md b/gcc/config/arm/types.md index df39522f2ad63a52c910b1a6bcc7aa13aaf5d021..dbcb4d58798d7f51b1b8310cd446c58317d7b50d 100644 --- a/gcc/config/arm/types.md +++ b/gcc/config/arm/types.md @@ -1097,7 +1097,8 @@ crypto_sm4,\ coproc,\ tme,\ - memtag" + memtag,\ + bf_cvt" (const_string "untyped")) ; Is this an (integer side) multiply with a 32-bit (or smaller) result? diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/bfcvt-compile.c b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/bfcvt-compile.c new file mode 100644 index 0000000000000000000000000000000000000000..ebe5b578c1fa82a6f2a166d55c7dc7e905b87135 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/bfcvt-compile.c @@ -0,0 +1,56 @@ +/* { dg-do assemble { target { aarch64*-*-* } } } */ +/* { dg-require-effective-target arm_v8_2a_bf16_neon_ok } */ +/* { dg-add-options arm_v8_2a_bf16_neon } */ +/* { dg-additional-options "-save-temps" } */ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include <arm_neon.h> + +/* +**test_bfcvtn: +** ... +** bfcvtn\tv[0-9]+.4h, v[0-9]+.4s +** ... +*/ +bfloat16x4_t test_bfcvtn (float32x4_t a) +{ + return vcvt_bf16_f32 (a); +} + +/* +**test_bfcvtnq: +** ... +** bfcvtn v[0-9]+.4h, v[0-9]+.4s +** ... +*/ +bfloat16x8_t test_bfcvtnq (float32x4_t a) +{ + return vcvtq_low_bf16_f32 (a); +} + +/* +**test_bfcvtnq2: +** ... +** bfcvtn v[0-9]+.4h, v[0-9]+.4s +** ... +*/ +bfloat16x8_t test_bfcvtnq2 (bfloat16x8_t inactive, float32x4_t a) +{ + return vcvtq_high_bf16_f32 (inactive, a); +} + +/* +**test_bfcvt: +** ... +** bfcvt h[0-9]+, s[0-9]+ +** ... +*/ +bfloat16_t test_bfcvt (float32_t a) +{ + return vcvth_bf16_f32 (a); +} + +/* { dg-final { scan-assembler {bfcvtn\tv[0-9]+.4h, v[0-9]+.4s} } } */ +/* { dg-final { scan-assembler {bfcvtn\tv[0-9]+.4h, v[0-9]+.4s} } } */ +/* { dg-final { scan-assembler {bfcvtn\tv[0-9]+.4h, v[0-9]+.4s} } } */ +/* { dg-final { scan-assembler {bfcvt\th[0-9]+, s[0-9]+} } } */