Message ID | 20240522020545.20597-1-quic_pzheng@quicinc.com |
---|---|
State | New |
Headers | show |
Series | aarch64: Fold vget_high_* intrinsics to BIT_FIELD_REF [PR102171] | expand |
Pengxuan Zheng <quic_pzheng@quicinc.com> writes: > This patch is a follow-up of r15-697-ga2e4fe5a53cf75 to also fold vget_high_* > intrinsics to BIT_FILED_REF and remove the vget_high_* definitions from > arm_neon.h to use the new intrinsics framework. > > PR target/102171 > > gcc/ChangeLog: > > * config/aarch64/aarch64-builtins.cc (AARCH64_SIMD_VGET_HIGH_BUILTINS): > New macro to create definitions for all vget_high intrinsics. > (VGET_HIGH_BUILTIN): Likewise. > (enum aarch64_builtins): Add vget_high function codes. > (AARCH64_SIMD_VGET_LOW_BUILTINS): Delete duplicate macro. > (aarch64_general_fold_builtin): Fold vget_high calls. > * config/aarch64/aarch64-simd-builtins.def: Delete vget_high builtins. > * config/aarch64/aarch64-simd.md (aarch64_get_high<mode>): Delete. > (aarch64_vget_hi_halfv8bf): Likewise. > * config/aarch64/arm_neon.h (__attribute__): Delete. > (vget_high_f16): Likewise. > (vget_high_f32): Likewise. > (vget_high_f64): Likewise. > (vget_high_p8): Likewise. > (vget_high_p16): Likewise. > (vget_high_p64): Likewise. > (vget_high_s8): Likewise. > (vget_high_s16): Likewise. > (vget_high_s32): Likewise. > (vget_high_s64): Likewise. > (vget_high_u8): Likewise. > (vget_high_u16): Likewise. > (vget_high_u32): Likewise. > (vget_high_u64): Likewise. > (vget_high_bf16): Likewise. > > gcc/testsuite/ChangeLog: > > * gcc.target/aarch64/vget_high_2.c: New test. > * gcc.target/aarch64/vget_high_2_be.c: New test. OK, thanks. Richard > Signed-off-by: Pengxuan Zheng <quic_pzheng@quicinc.com> > --- > gcc/config/aarch64/aarch64-builtins.cc | 59 +++++++--- > gcc/config/aarch64/aarch64-simd-builtins.def | 6 - > gcc/config/aarch64/aarch64-simd.md | 22 ---- > gcc/config/aarch64/arm_neon.h | 105 ------------------ > .../gcc.target/aarch64/vget_high_2.c | 30 +++++ > .../gcc.target/aarch64/vget_high_2_be.c | 31 ++++++ > 6 files changed, 104 insertions(+), 149 deletions(-) > create mode 100644 gcc/testsuite/gcc.target/aarch64/vget_high_2.c > create mode 100644 gcc/testsuite/gcc.target/aarch64/vget_high_2_be.c > > diff --git a/gcc/config/aarch64/aarch64-builtins.cc b/gcc/config/aarch64/aarch64-builtins.cc > index 11b888016ed..f8eeccb554d 100644 > --- a/gcc/config/aarch64/aarch64-builtins.cc > +++ b/gcc/config/aarch64/aarch64-builtins.cc > @@ -675,6 +675,23 @@ static aarch64_simd_builtin_datum aarch64_simd_builtin_data[] = { > VGET_LOW_BUILTIN(u64) \ > VGET_LOW_BUILTIN(bf16) > > +#define AARCH64_SIMD_VGET_HIGH_BUILTINS \ > + VGET_HIGH_BUILTIN(f16) \ > + VGET_HIGH_BUILTIN(f32) \ > + VGET_HIGH_BUILTIN(f64) \ > + VGET_HIGH_BUILTIN(p8) \ > + VGET_HIGH_BUILTIN(p16) \ > + VGET_HIGH_BUILTIN(p64) \ > + VGET_HIGH_BUILTIN(s8) \ > + VGET_HIGH_BUILTIN(s16) \ > + VGET_HIGH_BUILTIN(s32) \ > + VGET_HIGH_BUILTIN(s64) \ > + VGET_HIGH_BUILTIN(u8) \ > + VGET_HIGH_BUILTIN(u16) \ > + VGET_HIGH_BUILTIN(u32) \ > + VGET_HIGH_BUILTIN(u64) \ > + VGET_HIGH_BUILTIN(bf16) > + > typedef struct > { > const char *name; > @@ -717,6 +734,9 @@ typedef struct > #define VGET_LOW_BUILTIN(A) \ > AARCH64_SIMD_BUILTIN_VGET_LOW_##A, > > +#define VGET_HIGH_BUILTIN(A) \ > + AARCH64_SIMD_BUILTIN_VGET_HIGH_##A, > + > #undef VAR1 > #define VAR1(T, N, MAP, FLAG, A) \ > AARCH64_SIMD_BUILTIN_##T##_##N##A, > @@ -753,6 +773,7 @@ enum aarch64_builtins > /* SIMD intrinsic builtins. */ > AARCH64_SIMD_VREINTERPRET_BUILTINS > AARCH64_SIMD_VGET_LOW_BUILTINS > + AARCH64_SIMD_VGET_HIGH_BUILTINS > /* ARMv8.3-A Pointer Authentication Builtins. */ > AARCH64_PAUTH_BUILTIN_AUTIA1716, > AARCH64_PAUTH_BUILTIN_PACIA1716, > @@ -855,26 +876,21 @@ static aarch64_fcmla_laneq_builtin_datum aarch64_fcmla_lane_builtin_data[] = { > false \ > }, > > -#define AARCH64_SIMD_VGET_LOW_BUILTINS \ > - VGET_LOW_BUILTIN(f16) \ > - VGET_LOW_BUILTIN(f32) \ > - VGET_LOW_BUILTIN(f64) \ > - VGET_LOW_BUILTIN(p8) \ > - VGET_LOW_BUILTIN(p16) \ > - VGET_LOW_BUILTIN(p64) \ > - VGET_LOW_BUILTIN(s8) \ > - VGET_LOW_BUILTIN(s16) \ > - VGET_LOW_BUILTIN(s32) \ > - VGET_LOW_BUILTIN(s64) \ > - VGET_LOW_BUILTIN(u8) \ > - VGET_LOW_BUILTIN(u16) \ > - VGET_LOW_BUILTIN(u32) \ > - VGET_LOW_BUILTIN(u64) \ > - VGET_LOW_BUILTIN(bf16) > +#undef VGET_HIGH_BUILTIN > +#define VGET_HIGH_BUILTIN(A) \ > + {"vget_high_" #A, \ > + AARCH64_SIMD_BUILTIN_VGET_HIGH_##A, \ > + 2, \ > + { SIMD_INTR_MODE(A, d), SIMD_INTR_MODE(A, q) }, \ > + { SIMD_INTR_QUAL(A), SIMD_INTR_QUAL(A) }, \ > + FLAG_AUTO_FP, \ > + false \ > + }, > > static const aarch64_simd_intrinsic_datum aarch64_simd_intrinsic_data[] = { > AARCH64_SIMD_VREINTERPRET_BUILTINS > AARCH64_SIMD_VGET_LOW_BUILTINS > + AARCH64_SIMD_VGET_HIGH_BUILTINS > }; > > > @@ -3270,6 +3286,10 @@ aarch64_fold_builtin_lane_check (tree arg0, tree arg1, tree arg2) > #define VGET_LOW_BUILTIN(A) \ > case AARCH64_SIMD_BUILTIN_VGET_LOW_##A: > > +#undef VGET_HIGH_BUILTIN > +#define VGET_HIGH_BUILTIN(A) \ > + case AARCH64_SIMD_BUILTIN_VGET_HIGH_##A: > + > /* Try to fold a call to the built-in function with subcode FCODE. The > function is passed the N_ARGS arguments in ARGS and it returns a value > of type TYPE. Return the new expression on success and NULL_TREE on > @@ -3292,6 +3312,13 @@ aarch64_general_fold_builtin (unsigned int fcode, tree type, > { > auto pos = BYTES_BIG_ENDIAN ? 64 : 0; > > + return fold_build3 (BIT_FIELD_REF, type, args[0], bitsize_int (64), > + bitsize_int (pos)); > + } > + AARCH64_SIMD_VGET_HIGH_BUILTINS > + { > + auto pos = BYTES_BIG_ENDIAN ? 0 : 64; > + > return fold_build3 (BIT_FIELD_REF, type, args[0], bitsize_int (64), > bitsize_int (pos)); > } > diff --git a/gcc/config/aarch64/aarch64-simd-builtins.def b/gcc/config/aarch64/aarch64-simd-builtins.def > index a9f0558f8b6..e65f73d7ba2 100644 > --- a/gcc/config/aarch64/aarch64-simd-builtins.def > +++ b/gcc/config/aarch64/aarch64-simd-builtins.def > @@ -65,9 +65,6 @@ > BUILTIN_VS (UNOP, ctz, 2, NONE) > BUILTIN_VB (UNOP, popcount, 2, NONE) > > - /* Implemented by aarch64_get_high<mode>. */ > - BUILTIN_VQMOV (UNOP, get_high, 0, AUTO_FP) > - > /* Implemented by aarch64_<sur>q<r>shl<mode>. */ > BUILTIN_VSDQ_I (BINOP, sqshl, 0, NONE) > BUILTIN_VSDQ_I (BINOP_UUS, uqshl, 0, NONE) > @@ -958,9 +955,6 @@ > VAR1 (QUADOP_LANE, bfmlalb_lane_q, 0, FP, v4sf) > VAR1 (QUADOP_LANE, bfmlalt_lane_q, 0, FP, v4sf) > > - /* Implemented by aarch64_vget_hi_halfv8bf. */ > - VAR1 (UNOP, vget_hi_half, 0, AUTO_FP, v8bf) > - > /* Implemented by aarch64_simd_<sur>mmlav16qi. */ > VAR1 (TERNOP, simd_smmla, 0, NONE, v16qi) > VAR1 (TERNOPU, simd_ummla, 0, NONE, v16qi) > diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md > index 875ea52b02f..c311888e4bd 100644 > --- a/gcc/config/aarch64/aarch64-simd.md > +++ b/gcc/config/aarch64/aarch64-simd.md > @@ -288,17 +288,6 @@ (define_expand "aarch64_get_half<mode>" > } > ) > > -(define_expand "aarch64_get_high<mode>" > - [(match_operand:<VHALF> 0 "register_operand") > - (match_operand:VQMOV 1 "register_operand")] > - "TARGET_FLOAT" > - { > - rtx hi = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true); > - emit_insn (gen_aarch64_get_half<mode> (operands[0], operands[1], hi)); > - DONE; > - } > -) > - > (define_insn_and_split "aarch64_simd_mov_from_<mode>low" > [(set (match_operand:<VHALF> 0 "register_operand") > (vec_select:<VHALF> > @@ -9763,17 +9752,6 @@ (define_insn "aarch64_bfdot_lane<VBF:isquadop><VDQSF:mode>" > [(set_attr "type" "neon_dot<VDQSF:q>")] > ) > > -;; vget_high_bf16 > -(define_expand "aarch64_vget_hi_halfv8bf" > - [(match_operand:V4BF 0 "register_operand") > - (match_operand:V8BF 1 "register_operand")] > - "TARGET_BF16_SIMD" > -{ > - rtx p = aarch64_simd_vect_par_cnst_half (V8BFmode, 8, true); > - emit_insn (gen_aarch64_get_halfv8bf (operands[0], operands[1], p)); > - DONE; > -}) > - > ;; bfmmla > (define_insn "aarch64_bfmmlaqv4sf" > [(set (match_operand:V4SF 0 "register_operand" "=w") > diff --git a/gcc/config/aarch64/arm_neon.h b/gcc/config/aarch64/arm_neon.h > index 92c2c5361cd..c4a09528ffd 100644 > --- a/gcc/config/aarch64/arm_neon.h > +++ b/gcc/config/aarch64/arm_neon.h > @@ -3027,104 +3027,6 @@ vsetq_lane_u64 (uint64_t __elem, uint64x2_t __vec, const int __index) > return __aarch64_vset_lane_any (__elem, __vec, __index); > } > > -__extension__ extern __inline float16x4_t > -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) > -vget_high_f16 (float16x8_t __a) > -{ > - return __builtin_aarch64_get_highv8hf (__a); > -} > - > -__extension__ extern __inline float32x2_t > -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) > -vget_high_f32 (float32x4_t __a) > -{ > - return __builtin_aarch64_get_highv4sf (__a); > -} > - > -__extension__ extern __inline float64x1_t > -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) > -vget_high_f64 (float64x2_t __a) > -{ > - return (float64x1_t) {__builtin_aarch64_get_highv2df (__a)}; > -} > - > -__extension__ extern __inline poly8x8_t > -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) > -vget_high_p8 (poly8x16_t __a) > -{ > - return (poly8x8_t) __builtin_aarch64_get_highv16qi ((int8x16_t) __a); > -} > - > -__extension__ extern __inline poly16x4_t > -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) > -vget_high_p16 (poly16x8_t __a) > -{ > - return (poly16x4_t) __builtin_aarch64_get_highv8hi ((int16x8_t) __a); > -} > - > -__extension__ extern __inline poly64x1_t > -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) > -vget_high_p64 (poly64x2_t __a) > -{ > - return (poly64x1_t) __builtin_aarch64_get_highv2di ((int64x2_t) __a); > -} > - > -__extension__ extern __inline int8x8_t > -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) > -vget_high_s8 (int8x16_t __a) > -{ > - return __builtin_aarch64_get_highv16qi (__a); > -} > - > -__extension__ extern __inline int16x4_t > -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) > -vget_high_s16 (int16x8_t __a) > -{ > - return __builtin_aarch64_get_highv8hi (__a); > -} > - > -__extension__ extern __inline int32x2_t > -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) > -vget_high_s32 (int32x4_t __a) > -{ > - return __builtin_aarch64_get_highv4si (__a); > -} > - > -__extension__ extern __inline int64x1_t > -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) > -vget_high_s64 (int64x2_t __a) > -{ > - return (int64x1_t) {__builtin_aarch64_get_highv2di (__a)}; > -} > - > -__extension__ extern __inline uint8x8_t > -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) > -vget_high_u8 (uint8x16_t __a) > -{ > - return (uint8x8_t) __builtin_aarch64_get_highv16qi ((int8x16_t) __a); > -} > - > -__extension__ extern __inline uint16x4_t > -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) > -vget_high_u16 (uint16x8_t __a) > -{ > - return (uint16x4_t) __builtin_aarch64_get_highv8hi ((int16x8_t) __a); > -} > - > -__extension__ extern __inline uint32x2_t > -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) > -vget_high_u32 (uint32x4_t __a) > -{ > - return (uint32x2_t) __builtin_aarch64_get_highv4si ((int32x4_t) __a); > -} > - > -__extension__ extern __inline uint64x1_t > -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) > -vget_high_u64 (uint64x2_t __a) > -{ > - return (uint64x1_t) {__builtin_aarch64_get_highv2di ((int64x2_t) __a)}; > -} > - > > __extension__ extern __inline int8x16_t > __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) > @@ -28381,13 +28283,6 @@ vbfmlaltq_laneq_f32 (float32x4_t __r, bfloat16x8_t __a, bfloat16x8_t __b, > return __builtin_aarch64_bfmlalt_lane_qv4sf (__r, __a, __b, __index); > } > > -__extension__ extern __inline bfloat16x4_t > -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) > -vget_high_bf16 (bfloat16x8_t __a) > -{ > - return __builtin_aarch64_vget_hi_halfv8bf (__a); > -} > - > __extension__ extern __inline float32x4_t > __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) > vcvt_f32_bf16 (bfloat16x4_t __a) > diff --git a/gcc/testsuite/gcc.target/aarch64/vget_high_2.c b/gcc/testsuite/gcc.target/aarch64/vget_high_2.c > new file mode 100644 > index 00000000000..9593fb685e3 > --- /dev/null > +++ b/gcc/testsuite/gcc.target/aarch64/vget_high_2.c > @@ -0,0 +1,30 @@ > +/* { dg-do compile } */ > +/* { dg-options "-O3 -fdump-tree-optimized -mlittle-endian" } */ > + > +#include <arm_neon.h> > + > +#define VARIANTS \ > +VARIANT (uint8x8_t, uint8x16_t, u8) \ > +VARIANT (uint16x4_t, uint16x8_t, u16) \ > +VARIANT (uint32x2_t, uint32x4_t, u32) \ > +VARIANT (uint64x1_t, uint64x2_t, u64) \ > +VARIANT (int8x8_t, int8x16_t, s8) \ > +VARIANT (int16x4_t, int16x8_t, s16) \ > +VARIANT (int32x2_t, int32x4_t, s32) \ > +VARIANT (int64x1_t, int64x2_t, s64) \ > +VARIANT (float16x4_t, float16x8_t, f16) \ > +VARIANT (float32x2_t, float32x4_t, f32) \ > +VARIANT (float64x1_t, float64x2_t, f64) \ > +VARIANT (bfloat16x4_t, bfloat16x8_t, bf16) > + > +/* vget_high_* intrinsics should become BIT_FIELD_REF. */ > +#define VARIANT(TYPE64, TYPE128, SUFFIX) \ > +TYPE64 \ > +test_vget_high_##SUFFIX (TYPE128 vec) \ > +{ \ > + return vget_high_##SUFFIX (vec); \ > +} > + > +VARIANTS > + > +/* { dg-final { scan-tree-dump-times "BIT_FIELD_REF <vec_\[0-9\]*\\\(D\\\), 64, 64>" 12 "optimized" } } */ > diff --git a/gcc/testsuite/gcc.target/aarch64/vget_high_2_be.c b/gcc/testsuite/gcc.target/aarch64/vget_high_2_be.c > new file mode 100644 > index 00000000000..5928c3a4597 > --- /dev/null > +++ b/gcc/testsuite/gcc.target/aarch64/vget_high_2_be.c > @@ -0,0 +1,31 @@ > +/* { dg-do compile } */ > +/* { dg-require-effective-target stdint_types_mbig_endian } */ > +/* { dg-options "-O3 -fdump-tree-optimized -mbig-endian" } */ > + > +#include <arm_neon.h> > + > +#define VARIANTS \ > +VARIANT (uint8x8_t, uint8x16_t, u8) \ > +VARIANT (uint16x4_t, uint16x8_t, u16) \ > +VARIANT (uint32x2_t, uint32x4_t, u32) \ > +VARIANT (uint64x1_t, uint64x2_t, u64) \ > +VARIANT (int8x8_t, int8x16_t, s8) \ > +VARIANT (int16x4_t, int16x8_t, s16) \ > +VARIANT (int32x2_t, int32x4_t, s32) \ > +VARIANT (int64x1_t, int64x2_t, s64) \ > +VARIANT (float16x4_t, float16x8_t, f16) \ > +VARIANT (float32x2_t, float32x4_t, f32) \ > +VARIANT (float64x1_t, float64x2_t, f64) \ > +VARIANT (bfloat16x4_t, bfloat16x8_t, bf16) > + > +/* vget_high_* intrinsics should become BIT_FIELD_REF. */ > +#define VARIANT(TYPE64, TYPE128, SUFFIX) \ > +TYPE64 \ > +test_vget_high_##SUFFIX (TYPE128 vec) \ > +{ \ > + return vget_high_##SUFFIX (vec); \ > +} > + > +VARIANTS > + > +/* { dg-final { scan-tree-dump-times "BIT_FIELD_REF <vec_\[0-9\]*\\\(D\\\), 64, 0>" 12 "optimized" } } */
On Wed, May 22, 2024 at 5:28 AM Richard Sandiford <richard.sandiford@arm.com> wrote: > > Pengxuan Zheng <quic_pzheng@quicinc.com> writes: > > This patch is a follow-up of r15-697-ga2e4fe5a53cf75 to also fold vget_high_* > > intrinsics to BIT_FILED_REF and remove the vget_high_* definitions from > > arm_neon.h to use the new intrinsics framework. > > > > PR target/102171 > > > > gcc/ChangeLog: > > > > * config/aarch64/aarch64-builtins.cc (AARCH64_SIMD_VGET_HIGH_BUILTINS): > > New macro to create definitions for all vget_high intrinsics. > > (VGET_HIGH_BUILTIN): Likewise. > > (enum aarch64_builtins): Add vget_high function codes. > > (AARCH64_SIMD_VGET_LOW_BUILTINS): Delete duplicate macro. > > (aarch64_general_fold_builtin): Fold vget_high calls. > > * config/aarch64/aarch64-simd-builtins.def: Delete vget_high builtins. > > * config/aarch64/aarch64-simd.md (aarch64_get_high<mode>): Delete. > > (aarch64_vget_hi_halfv8bf): Likewise. > > * config/aarch64/arm_neon.h (__attribute__): Delete. > > (vget_high_f16): Likewise. > > (vget_high_f32): Likewise. > > (vget_high_f64): Likewise. > > (vget_high_p8): Likewise. > > (vget_high_p16): Likewise. > > (vget_high_p64): Likewise. > > (vget_high_s8): Likewise. > > (vget_high_s16): Likewise. > > (vget_high_s32): Likewise. > > (vget_high_s64): Likewise. > > (vget_high_u8): Likewise. > > (vget_high_u16): Likewise. > > (vget_high_u32): Likewise. > > (vget_high_u64): Likewise. > > (vget_high_bf16): Likewise. > > > > gcc/testsuite/ChangeLog: > > > > * gcc.target/aarch64/vget_high_2.c: New test. > > * gcc.target/aarch64/vget_high_2_be.c: New test. > > OK, thanks. Pushed as r15-778-g1d1ef1c22752b3 . Thanks, Andrew > > Richard > > > Signed-off-by: Pengxuan Zheng <quic_pzheng@quicinc.com> > > --- > > gcc/config/aarch64/aarch64-builtins.cc | 59 +++++++--- > > gcc/config/aarch64/aarch64-simd-builtins.def | 6 - > > gcc/config/aarch64/aarch64-simd.md | 22 ---- > > gcc/config/aarch64/arm_neon.h | 105 ------------------ > > .../gcc.target/aarch64/vget_high_2.c | 30 +++++ > > .../gcc.target/aarch64/vget_high_2_be.c | 31 ++++++ > > 6 files changed, 104 insertions(+), 149 deletions(-) > > create mode 100644 gcc/testsuite/gcc.target/aarch64/vget_high_2.c > > create mode 100644 gcc/testsuite/gcc.target/aarch64/vget_high_2_be.c > > > > diff --git a/gcc/config/aarch64/aarch64-builtins.cc b/gcc/config/aarch64/aarch64-builtins.cc > > index 11b888016ed..f8eeccb554d 100644 > > --- a/gcc/config/aarch64/aarch64-builtins.cc > > +++ b/gcc/config/aarch64/aarch64-builtins.cc > > @@ -675,6 +675,23 @@ static aarch64_simd_builtin_datum aarch64_simd_builtin_data[] = { > > VGET_LOW_BUILTIN(u64) \ > > VGET_LOW_BUILTIN(bf16) > > > > +#define AARCH64_SIMD_VGET_HIGH_BUILTINS \ > > + VGET_HIGH_BUILTIN(f16) \ > > + VGET_HIGH_BUILTIN(f32) \ > > + VGET_HIGH_BUILTIN(f64) \ > > + VGET_HIGH_BUILTIN(p8) \ > > + VGET_HIGH_BUILTIN(p16) \ > > + VGET_HIGH_BUILTIN(p64) \ > > + VGET_HIGH_BUILTIN(s8) \ > > + VGET_HIGH_BUILTIN(s16) \ > > + VGET_HIGH_BUILTIN(s32) \ > > + VGET_HIGH_BUILTIN(s64) \ > > + VGET_HIGH_BUILTIN(u8) \ > > + VGET_HIGH_BUILTIN(u16) \ > > + VGET_HIGH_BUILTIN(u32) \ > > + VGET_HIGH_BUILTIN(u64) \ > > + VGET_HIGH_BUILTIN(bf16) > > + > > typedef struct > > { > > const char *name; > > @@ -717,6 +734,9 @@ typedef struct > > #define VGET_LOW_BUILTIN(A) \ > > AARCH64_SIMD_BUILTIN_VGET_LOW_##A, > > > > +#define VGET_HIGH_BUILTIN(A) \ > > + AARCH64_SIMD_BUILTIN_VGET_HIGH_##A, > > + > > #undef VAR1 > > #define VAR1(T, N, MAP, FLAG, A) \ > > AARCH64_SIMD_BUILTIN_##T##_##N##A, > > @@ -753,6 +773,7 @@ enum aarch64_builtins > > /* SIMD intrinsic builtins. */ > > AARCH64_SIMD_VREINTERPRET_BUILTINS > > AARCH64_SIMD_VGET_LOW_BUILTINS > > + AARCH64_SIMD_VGET_HIGH_BUILTINS > > /* ARMv8.3-A Pointer Authentication Builtins. */ > > AARCH64_PAUTH_BUILTIN_AUTIA1716, > > AARCH64_PAUTH_BUILTIN_PACIA1716, > > @@ -855,26 +876,21 @@ static aarch64_fcmla_laneq_builtin_datum aarch64_fcmla_lane_builtin_data[] = { > > false \ > > }, > > > > -#define AARCH64_SIMD_VGET_LOW_BUILTINS \ > > - VGET_LOW_BUILTIN(f16) \ > > - VGET_LOW_BUILTIN(f32) \ > > - VGET_LOW_BUILTIN(f64) \ > > - VGET_LOW_BUILTIN(p8) \ > > - VGET_LOW_BUILTIN(p16) \ > > - VGET_LOW_BUILTIN(p64) \ > > - VGET_LOW_BUILTIN(s8) \ > > - VGET_LOW_BUILTIN(s16) \ > > - VGET_LOW_BUILTIN(s32) \ > > - VGET_LOW_BUILTIN(s64) \ > > - VGET_LOW_BUILTIN(u8) \ > > - VGET_LOW_BUILTIN(u16) \ > > - VGET_LOW_BUILTIN(u32) \ > > - VGET_LOW_BUILTIN(u64) \ > > - VGET_LOW_BUILTIN(bf16) > > +#undef VGET_HIGH_BUILTIN > > +#define VGET_HIGH_BUILTIN(A) \ > > + {"vget_high_" #A, \ > > + AARCH64_SIMD_BUILTIN_VGET_HIGH_##A, \ > > + 2, \ > > + { SIMD_INTR_MODE(A, d), SIMD_INTR_MODE(A, q) }, \ > > + { SIMD_INTR_QUAL(A), SIMD_INTR_QUAL(A) }, \ > > + FLAG_AUTO_FP, \ > > + false \ > > + }, > > > > static const aarch64_simd_intrinsic_datum aarch64_simd_intrinsic_data[] = { > > AARCH64_SIMD_VREINTERPRET_BUILTINS > > AARCH64_SIMD_VGET_LOW_BUILTINS > > + AARCH64_SIMD_VGET_HIGH_BUILTINS > > }; > > > > > > @@ -3270,6 +3286,10 @@ aarch64_fold_builtin_lane_check (tree arg0, tree arg1, tree arg2) > > #define VGET_LOW_BUILTIN(A) \ > > case AARCH64_SIMD_BUILTIN_VGET_LOW_##A: > > > > +#undef VGET_HIGH_BUILTIN > > +#define VGET_HIGH_BUILTIN(A) \ > > + case AARCH64_SIMD_BUILTIN_VGET_HIGH_##A: > > + > > /* Try to fold a call to the built-in function with subcode FCODE. The > > function is passed the N_ARGS arguments in ARGS and it returns a value > > of type TYPE. Return the new expression on success and NULL_TREE on > > @@ -3292,6 +3312,13 @@ aarch64_general_fold_builtin (unsigned int fcode, tree type, > > { > > auto pos = BYTES_BIG_ENDIAN ? 64 : 0; > > > > + return fold_build3 (BIT_FIELD_REF, type, args[0], bitsize_int (64), > > + bitsize_int (pos)); > > + } > > + AARCH64_SIMD_VGET_HIGH_BUILTINS > > + { > > + auto pos = BYTES_BIG_ENDIAN ? 0 : 64; > > + > > return fold_build3 (BIT_FIELD_REF, type, args[0], bitsize_int (64), > > bitsize_int (pos)); > > } > > diff --git a/gcc/config/aarch64/aarch64-simd-builtins.def b/gcc/config/aarch64/aarch64-simd-builtins.def > > index a9f0558f8b6..e65f73d7ba2 100644 > > --- a/gcc/config/aarch64/aarch64-simd-builtins.def > > +++ b/gcc/config/aarch64/aarch64-simd-builtins.def > > @@ -65,9 +65,6 @@ > > BUILTIN_VS (UNOP, ctz, 2, NONE) > > BUILTIN_VB (UNOP, popcount, 2, NONE) > > > > - /* Implemented by aarch64_get_high<mode>. */ > > - BUILTIN_VQMOV (UNOP, get_high, 0, AUTO_FP) > > - > > /* Implemented by aarch64_<sur>q<r>shl<mode>. */ > > BUILTIN_VSDQ_I (BINOP, sqshl, 0, NONE) > > BUILTIN_VSDQ_I (BINOP_UUS, uqshl, 0, NONE) > > @@ -958,9 +955,6 @@ > > VAR1 (QUADOP_LANE, bfmlalb_lane_q, 0, FP, v4sf) > > VAR1 (QUADOP_LANE, bfmlalt_lane_q, 0, FP, v4sf) > > > > - /* Implemented by aarch64_vget_hi_halfv8bf. */ > > - VAR1 (UNOP, vget_hi_half, 0, AUTO_FP, v8bf) > > - > > /* Implemented by aarch64_simd_<sur>mmlav16qi. */ > > VAR1 (TERNOP, simd_smmla, 0, NONE, v16qi) > > VAR1 (TERNOPU, simd_ummla, 0, NONE, v16qi) > > diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md > > index 875ea52b02f..c311888e4bd 100644 > > --- a/gcc/config/aarch64/aarch64-simd.md > > +++ b/gcc/config/aarch64/aarch64-simd.md > > @@ -288,17 +288,6 @@ (define_expand "aarch64_get_half<mode>" > > } > > ) > > > > -(define_expand "aarch64_get_high<mode>" > > - [(match_operand:<VHALF> 0 "register_operand") > > - (match_operand:VQMOV 1 "register_operand")] > > - "TARGET_FLOAT" > > - { > > - rtx hi = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true); > > - emit_insn (gen_aarch64_get_half<mode> (operands[0], operands[1], hi)); > > - DONE; > > - } > > -) > > - > > (define_insn_and_split "aarch64_simd_mov_from_<mode>low" > > [(set (match_operand:<VHALF> 0 "register_operand") > > (vec_select:<VHALF> > > @@ -9763,17 +9752,6 @@ (define_insn "aarch64_bfdot_lane<VBF:isquadop><VDQSF:mode>" > > [(set_attr "type" "neon_dot<VDQSF:q>")] > > ) > > > > -;; vget_high_bf16 > > -(define_expand "aarch64_vget_hi_halfv8bf" > > - [(match_operand:V4BF 0 "register_operand") > > - (match_operand:V8BF 1 "register_operand")] > > - "TARGET_BF16_SIMD" > > -{ > > - rtx p = aarch64_simd_vect_par_cnst_half (V8BFmode, 8, true); > > - emit_insn (gen_aarch64_get_halfv8bf (operands[0], operands[1], p)); > > - DONE; > > -}) > > - > > ;; bfmmla > > (define_insn "aarch64_bfmmlaqv4sf" > > [(set (match_operand:V4SF 0 "register_operand" "=w") > > diff --git a/gcc/config/aarch64/arm_neon.h b/gcc/config/aarch64/arm_neon.h > > index 92c2c5361cd..c4a09528ffd 100644 > > --- a/gcc/config/aarch64/arm_neon.h > > +++ b/gcc/config/aarch64/arm_neon.h > > @@ -3027,104 +3027,6 @@ vsetq_lane_u64 (uint64_t __elem, uint64x2_t __vec, const int __index) > > return __aarch64_vset_lane_any (__elem, __vec, __index); > > } > > > > -__extension__ extern __inline float16x4_t > > -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) > > -vget_high_f16 (float16x8_t __a) > > -{ > > - return __builtin_aarch64_get_highv8hf (__a); > > -} > > - > > -__extension__ extern __inline float32x2_t > > -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) > > -vget_high_f32 (float32x4_t __a) > > -{ > > - return __builtin_aarch64_get_highv4sf (__a); > > -} > > - > > -__extension__ extern __inline float64x1_t > > -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) > > -vget_high_f64 (float64x2_t __a) > > -{ > > - return (float64x1_t) {__builtin_aarch64_get_highv2df (__a)}; > > -} > > - > > -__extension__ extern __inline poly8x8_t > > -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) > > -vget_high_p8 (poly8x16_t __a) > > -{ > > - return (poly8x8_t) __builtin_aarch64_get_highv16qi ((int8x16_t) __a); > > -} > > - > > -__extension__ extern __inline poly16x4_t > > -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) > > -vget_high_p16 (poly16x8_t __a) > > -{ > > - return (poly16x4_t) __builtin_aarch64_get_highv8hi ((int16x8_t) __a); > > -} > > - > > -__extension__ extern __inline poly64x1_t > > -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) > > -vget_high_p64 (poly64x2_t __a) > > -{ > > - return (poly64x1_t) __builtin_aarch64_get_highv2di ((int64x2_t) __a); > > -} > > - > > -__extension__ extern __inline int8x8_t > > -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) > > -vget_high_s8 (int8x16_t __a) > > -{ > > - return __builtin_aarch64_get_highv16qi (__a); > > -} > > - > > -__extension__ extern __inline int16x4_t > > -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) > > -vget_high_s16 (int16x8_t __a) > > -{ > > - return __builtin_aarch64_get_highv8hi (__a); > > -} > > - > > -__extension__ extern __inline int32x2_t > > -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) > > -vget_high_s32 (int32x4_t __a) > > -{ > > - return __builtin_aarch64_get_highv4si (__a); > > -} > > - > > -__extension__ extern __inline int64x1_t > > -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) > > -vget_high_s64 (int64x2_t __a) > > -{ > > - return (int64x1_t) {__builtin_aarch64_get_highv2di (__a)}; > > -} > > - > > -__extension__ extern __inline uint8x8_t > > -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) > > -vget_high_u8 (uint8x16_t __a) > > -{ > > - return (uint8x8_t) __builtin_aarch64_get_highv16qi ((int8x16_t) __a); > > -} > > - > > -__extension__ extern __inline uint16x4_t > > -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) > > -vget_high_u16 (uint16x8_t __a) > > -{ > > - return (uint16x4_t) __builtin_aarch64_get_highv8hi ((int16x8_t) __a); > > -} > > - > > -__extension__ extern __inline uint32x2_t > > -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) > > -vget_high_u32 (uint32x4_t __a) > > -{ > > - return (uint32x2_t) __builtin_aarch64_get_highv4si ((int32x4_t) __a); > > -} > > - > > -__extension__ extern __inline uint64x1_t > > -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) > > -vget_high_u64 (uint64x2_t __a) > > -{ > > - return (uint64x1_t) {__builtin_aarch64_get_highv2di ((int64x2_t) __a)}; > > -} > > - > > > > __extension__ extern __inline int8x16_t > > __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) > > @@ -28381,13 +28283,6 @@ vbfmlaltq_laneq_f32 (float32x4_t __r, bfloat16x8_t __a, bfloat16x8_t __b, > > return __builtin_aarch64_bfmlalt_lane_qv4sf (__r, __a, __b, __index); > > } > > > > -__extension__ extern __inline bfloat16x4_t > > -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) > > -vget_high_bf16 (bfloat16x8_t __a) > > -{ > > - return __builtin_aarch64_vget_hi_halfv8bf (__a); > > -} > > - > > __extension__ extern __inline float32x4_t > > __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) > > vcvt_f32_bf16 (bfloat16x4_t __a) > > diff --git a/gcc/testsuite/gcc.target/aarch64/vget_high_2.c b/gcc/testsuite/gcc.target/aarch64/vget_high_2.c > > new file mode 100644 > > index 00000000000..9593fb685e3 > > --- /dev/null > > +++ b/gcc/testsuite/gcc.target/aarch64/vget_high_2.c > > @@ -0,0 +1,30 @@ > > +/* { dg-do compile } */ > > +/* { dg-options "-O3 -fdump-tree-optimized -mlittle-endian" } */ > > + > > +#include <arm_neon.h> > > + > > +#define VARIANTS \ > > +VARIANT (uint8x8_t, uint8x16_t, u8) \ > > +VARIANT (uint16x4_t, uint16x8_t, u16) \ > > +VARIANT (uint32x2_t, uint32x4_t, u32) \ > > +VARIANT (uint64x1_t, uint64x2_t, u64) \ > > +VARIANT (int8x8_t, int8x16_t, s8) \ > > +VARIANT (int16x4_t, int16x8_t, s16) \ > > +VARIANT (int32x2_t, int32x4_t, s32) \ > > +VARIANT (int64x1_t, int64x2_t, s64) \ > > +VARIANT (float16x4_t, float16x8_t, f16) \ > > +VARIANT (float32x2_t, float32x4_t, f32) \ > > +VARIANT (float64x1_t, float64x2_t, f64) \ > > +VARIANT (bfloat16x4_t, bfloat16x8_t, bf16) > > + > > +/* vget_high_* intrinsics should become BIT_FIELD_REF. */ > > +#define VARIANT(TYPE64, TYPE128, SUFFIX) \ > > +TYPE64 \ > > +test_vget_high_##SUFFIX (TYPE128 vec) \ > > +{ \ > > + return vget_high_##SUFFIX (vec); \ > > +} > > + > > +VARIANTS > > + > > +/* { dg-final { scan-tree-dump-times "BIT_FIELD_REF <vec_\[0-9\]*\\\(D\\\), 64, 64>" 12 "optimized" } } */ > > diff --git a/gcc/testsuite/gcc.target/aarch64/vget_high_2_be.c b/gcc/testsuite/gcc.target/aarch64/vget_high_2_be.c > > new file mode 100644 > > index 00000000000..5928c3a4597 > > --- /dev/null > > +++ b/gcc/testsuite/gcc.target/aarch64/vget_high_2_be.c > > @@ -0,0 +1,31 @@ > > +/* { dg-do compile } */ > > +/* { dg-require-effective-target stdint_types_mbig_endian } */ > > +/* { dg-options "-O3 -fdump-tree-optimized -mbig-endian" } */ > > + > > +#include <arm_neon.h> > > + > > +#define VARIANTS \ > > +VARIANT (uint8x8_t, uint8x16_t, u8) \ > > +VARIANT (uint16x4_t, uint16x8_t, u16) \ > > +VARIANT (uint32x2_t, uint32x4_t, u32) \ > > +VARIANT (uint64x1_t, uint64x2_t, u64) \ > > +VARIANT (int8x8_t, int8x16_t, s8) \ > > +VARIANT (int16x4_t, int16x8_t, s16) \ > > +VARIANT (int32x2_t, int32x4_t, s32) \ > > +VARIANT (int64x1_t, int64x2_t, s64) \ > > +VARIANT (float16x4_t, float16x8_t, f16) \ > > +VARIANT (float32x2_t, float32x4_t, f32) \ > > +VARIANT (float64x1_t, float64x2_t, f64) \ > > +VARIANT (bfloat16x4_t, bfloat16x8_t, bf16) > > + > > +/* vget_high_* intrinsics should become BIT_FIELD_REF. */ > > +#define VARIANT(TYPE64, TYPE128, SUFFIX) \ > > +TYPE64 \ > > +test_vget_high_##SUFFIX (TYPE128 vec) \ > > +{ \ > > + return vget_high_##SUFFIX (vec); \ > > +} > > + > > +VARIANTS > > + > > +/* { dg-final { scan-tree-dump-times "BIT_FIELD_REF <vec_\[0-9\]*\\\(D\\\), 64, 0>" 12 "optimized" } } */
diff --git a/gcc/config/aarch64/aarch64-builtins.cc b/gcc/config/aarch64/aarch64-builtins.cc index 11b888016ed..f8eeccb554d 100644 --- a/gcc/config/aarch64/aarch64-builtins.cc +++ b/gcc/config/aarch64/aarch64-builtins.cc @@ -675,6 +675,23 @@ static aarch64_simd_builtin_datum aarch64_simd_builtin_data[] = { VGET_LOW_BUILTIN(u64) \ VGET_LOW_BUILTIN(bf16) +#define AARCH64_SIMD_VGET_HIGH_BUILTINS \ + VGET_HIGH_BUILTIN(f16) \ + VGET_HIGH_BUILTIN(f32) \ + VGET_HIGH_BUILTIN(f64) \ + VGET_HIGH_BUILTIN(p8) \ + VGET_HIGH_BUILTIN(p16) \ + VGET_HIGH_BUILTIN(p64) \ + VGET_HIGH_BUILTIN(s8) \ + VGET_HIGH_BUILTIN(s16) \ + VGET_HIGH_BUILTIN(s32) \ + VGET_HIGH_BUILTIN(s64) \ + VGET_HIGH_BUILTIN(u8) \ + VGET_HIGH_BUILTIN(u16) \ + VGET_HIGH_BUILTIN(u32) \ + VGET_HIGH_BUILTIN(u64) \ + VGET_HIGH_BUILTIN(bf16) + typedef struct { const char *name; @@ -717,6 +734,9 @@ typedef struct #define VGET_LOW_BUILTIN(A) \ AARCH64_SIMD_BUILTIN_VGET_LOW_##A, +#define VGET_HIGH_BUILTIN(A) \ + AARCH64_SIMD_BUILTIN_VGET_HIGH_##A, + #undef VAR1 #define VAR1(T, N, MAP, FLAG, A) \ AARCH64_SIMD_BUILTIN_##T##_##N##A, @@ -753,6 +773,7 @@ enum aarch64_builtins /* SIMD intrinsic builtins. */ AARCH64_SIMD_VREINTERPRET_BUILTINS AARCH64_SIMD_VGET_LOW_BUILTINS + AARCH64_SIMD_VGET_HIGH_BUILTINS /* ARMv8.3-A Pointer Authentication Builtins. */ AARCH64_PAUTH_BUILTIN_AUTIA1716, AARCH64_PAUTH_BUILTIN_PACIA1716, @@ -855,26 +876,21 @@ static aarch64_fcmla_laneq_builtin_datum aarch64_fcmla_lane_builtin_data[] = { false \ }, -#define AARCH64_SIMD_VGET_LOW_BUILTINS \ - VGET_LOW_BUILTIN(f16) \ - VGET_LOW_BUILTIN(f32) \ - VGET_LOW_BUILTIN(f64) \ - VGET_LOW_BUILTIN(p8) \ - VGET_LOW_BUILTIN(p16) \ - VGET_LOW_BUILTIN(p64) \ - VGET_LOW_BUILTIN(s8) \ - VGET_LOW_BUILTIN(s16) \ - VGET_LOW_BUILTIN(s32) \ - VGET_LOW_BUILTIN(s64) \ - VGET_LOW_BUILTIN(u8) \ - VGET_LOW_BUILTIN(u16) \ - VGET_LOW_BUILTIN(u32) \ - VGET_LOW_BUILTIN(u64) \ - VGET_LOW_BUILTIN(bf16) +#undef VGET_HIGH_BUILTIN +#define VGET_HIGH_BUILTIN(A) \ + {"vget_high_" #A, \ + AARCH64_SIMD_BUILTIN_VGET_HIGH_##A, \ + 2, \ + { SIMD_INTR_MODE(A, d), SIMD_INTR_MODE(A, q) }, \ + { SIMD_INTR_QUAL(A), SIMD_INTR_QUAL(A) }, \ + FLAG_AUTO_FP, \ + false \ + }, static const aarch64_simd_intrinsic_datum aarch64_simd_intrinsic_data[] = { AARCH64_SIMD_VREINTERPRET_BUILTINS AARCH64_SIMD_VGET_LOW_BUILTINS + AARCH64_SIMD_VGET_HIGH_BUILTINS }; @@ -3270,6 +3286,10 @@ aarch64_fold_builtin_lane_check (tree arg0, tree arg1, tree arg2) #define VGET_LOW_BUILTIN(A) \ case AARCH64_SIMD_BUILTIN_VGET_LOW_##A: +#undef VGET_HIGH_BUILTIN +#define VGET_HIGH_BUILTIN(A) \ + case AARCH64_SIMD_BUILTIN_VGET_HIGH_##A: + /* Try to fold a call to the built-in function with subcode FCODE. The function is passed the N_ARGS arguments in ARGS and it returns a value of type TYPE. Return the new expression on success and NULL_TREE on @@ -3292,6 +3312,13 @@ aarch64_general_fold_builtin (unsigned int fcode, tree type, { auto pos = BYTES_BIG_ENDIAN ? 64 : 0; + return fold_build3 (BIT_FIELD_REF, type, args[0], bitsize_int (64), + bitsize_int (pos)); + } + AARCH64_SIMD_VGET_HIGH_BUILTINS + { + auto pos = BYTES_BIG_ENDIAN ? 0 : 64; + return fold_build3 (BIT_FIELD_REF, type, args[0], bitsize_int (64), bitsize_int (pos)); } diff --git a/gcc/config/aarch64/aarch64-simd-builtins.def b/gcc/config/aarch64/aarch64-simd-builtins.def index a9f0558f8b6..e65f73d7ba2 100644 --- a/gcc/config/aarch64/aarch64-simd-builtins.def +++ b/gcc/config/aarch64/aarch64-simd-builtins.def @@ -65,9 +65,6 @@ BUILTIN_VS (UNOP, ctz, 2, NONE) BUILTIN_VB (UNOP, popcount, 2, NONE) - /* Implemented by aarch64_get_high<mode>. */ - BUILTIN_VQMOV (UNOP, get_high, 0, AUTO_FP) - /* Implemented by aarch64_<sur>q<r>shl<mode>. */ BUILTIN_VSDQ_I (BINOP, sqshl, 0, NONE) BUILTIN_VSDQ_I (BINOP_UUS, uqshl, 0, NONE) @@ -958,9 +955,6 @@ VAR1 (QUADOP_LANE, bfmlalb_lane_q, 0, FP, v4sf) VAR1 (QUADOP_LANE, bfmlalt_lane_q, 0, FP, v4sf) - /* Implemented by aarch64_vget_hi_halfv8bf. */ - VAR1 (UNOP, vget_hi_half, 0, AUTO_FP, v8bf) - /* Implemented by aarch64_simd_<sur>mmlav16qi. */ VAR1 (TERNOP, simd_smmla, 0, NONE, v16qi) VAR1 (TERNOPU, simd_ummla, 0, NONE, v16qi) diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md index 875ea52b02f..c311888e4bd 100644 --- a/gcc/config/aarch64/aarch64-simd.md +++ b/gcc/config/aarch64/aarch64-simd.md @@ -288,17 +288,6 @@ (define_expand "aarch64_get_half<mode>" } ) -(define_expand "aarch64_get_high<mode>" - [(match_operand:<VHALF> 0 "register_operand") - (match_operand:VQMOV 1 "register_operand")] - "TARGET_FLOAT" - { - rtx hi = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true); - emit_insn (gen_aarch64_get_half<mode> (operands[0], operands[1], hi)); - DONE; - } -) - (define_insn_and_split "aarch64_simd_mov_from_<mode>low" [(set (match_operand:<VHALF> 0 "register_operand") (vec_select:<VHALF> @@ -9763,17 +9752,6 @@ (define_insn "aarch64_bfdot_lane<VBF:isquadop><VDQSF:mode>" [(set_attr "type" "neon_dot<VDQSF:q>")] ) -;; vget_high_bf16 -(define_expand "aarch64_vget_hi_halfv8bf" - [(match_operand:V4BF 0 "register_operand") - (match_operand:V8BF 1 "register_operand")] - "TARGET_BF16_SIMD" -{ - rtx p = aarch64_simd_vect_par_cnst_half (V8BFmode, 8, true); - emit_insn (gen_aarch64_get_halfv8bf (operands[0], operands[1], p)); - DONE; -}) - ;; bfmmla (define_insn "aarch64_bfmmlaqv4sf" [(set (match_operand:V4SF 0 "register_operand" "=w") diff --git a/gcc/config/aarch64/arm_neon.h b/gcc/config/aarch64/arm_neon.h index 92c2c5361cd..c4a09528ffd 100644 --- a/gcc/config/aarch64/arm_neon.h +++ b/gcc/config/aarch64/arm_neon.h @@ -3027,104 +3027,6 @@ vsetq_lane_u64 (uint64_t __elem, uint64x2_t __vec, const int __index) return __aarch64_vset_lane_any (__elem, __vec, __index); } -__extension__ extern __inline float16x4_t -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -vget_high_f16 (float16x8_t __a) -{ - return __builtin_aarch64_get_highv8hf (__a); -} - -__extension__ extern __inline float32x2_t -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -vget_high_f32 (float32x4_t __a) -{ - return __builtin_aarch64_get_highv4sf (__a); -} - -__extension__ extern __inline float64x1_t -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -vget_high_f64 (float64x2_t __a) -{ - return (float64x1_t) {__builtin_aarch64_get_highv2df (__a)}; -} - -__extension__ extern __inline poly8x8_t -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -vget_high_p8 (poly8x16_t __a) -{ - return (poly8x8_t) __builtin_aarch64_get_highv16qi ((int8x16_t) __a); -} - -__extension__ extern __inline poly16x4_t -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -vget_high_p16 (poly16x8_t __a) -{ - return (poly16x4_t) __builtin_aarch64_get_highv8hi ((int16x8_t) __a); -} - -__extension__ extern __inline poly64x1_t -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -vget_high_p64 (poly64x2_t __a) -{ - return (poly64x1_t) __builtin_aarch64_get_highv2di ((int64x2_t) __a); -} - -__extension__ extern __inline int8x8_t -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -vget_high_s8 (int8x16_t __a) -{ - return __builtin_aarch64_get_highv16qi (__a); -} - -__extension__ extern __inline int16x4_t -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -vget_high_s16 (int16x8_t __a) -{ - return __builtin_aarch64_get_highv8hi (__a); -} - -__extension__ extern __inline int32x2_t -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -vget_high_s32 (int32x4_t __a) -{ - return __builtin_aarch64_get_highv4si (__a); -} - -__extension__ extern __inline int64x1_t -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -vget_high_s64 (int64x2_t __a) -{ - return (int64x1_t) {__builtin_aarch64_get_highv2di (__a)}; -} - -__extension__ extern __inline uint8x8_t -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -vget_high_u8 (uint8x16_t __a) -{ - return (uint8x8_t) __builtin_aarch64_get_highv16qi ((int8x16_t) __a); -} - -__extension__ extern __inline uint16x4_t -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -vget_high_u16 (uint16x8_t __a) -{ - return (uint16x4_t) __builtin_aarch64_get_highv8hi ((int16x8_t) __a); -} - -__extension__ extern __inline uint32x2_t -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -vget_high_u32 (uint32x4_t __a) -{ - return (uint32x2_t) __builtin_aarch64_get_highv4si ((int32x4_t) __a); -} - -__extension__ extern __inline uint64x1_t -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -vget_high_u64 (uint64x2_t __a) -{ - return (uint64x1_t) {__builtin_aarch64_get_highv2di ((int64x2_t) __a)}; -} - __extension__ extern __inline int8x16_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) @@ -28381,13 +28283,6 @@ vbfmlaltq_laneq_f32 (float32x4_t __r, bfloat16x8_t __a, bfloat16x8_t __b, return __builtin_aarch64_bfmlalt_lane_qv4sf (__r, __a, __b, __index); } -__extension__ extern __inline bfloat16x4_t -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -vget_high_bf16 (bfloat16x8_t __a) -{ - return __builtin_aarch64_vget_hi_halfv8bf (__a); -} - __extension__ extern __inline float32x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vcvt_f32_bf16 (bfloat16x4_t __a) diff --git a/gcc/testsuite/gcc.target/aarch64/vget_high_2.c b/gcc/testsuite/gcc.target/aarch64/vget_high_2.c new file mode 100644 index 00000000000..9593fb685e3 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/vget_high_2.c @@ -0,0 +1,30 @@ +/* { dg-do compile } */ +/* { dg-options "-O3 -fdump-tree-optimized -mlittle-endian" } */ + +#include <arm_neon.h> + +#define VARIANTS \ +VARIANT (uint8x8_t, uint8x16_t, u8) \ +VARIANT (uint16x4_t, uint16x8_t, u16) \ +VARIANT (uint32x2_t, uint32x4_t, u32) \ +VARIANT (uint64x1_t, uint64x2_t, u64) \ +VARIANT (int8x8_t, int8x16_t, s8) \ +VARIANT (int16x4_t, int16x8_t, s16) \ +VARIANT (int32x2_t, int32x4_t, s32) \ +VARIANT (int64x1_t, int64x2_t, s64) \ +VARIANT (float16x4_t, float16x8_t, f16) \ +VARIANT (float32x2_t, float32x4_t, f32) \ +VARIANT (float64x1_t, float64x2_t, f64) \ +VARIANT (bfloat16x4_t, bfloat16x8_t, bf16) + +/* vget_high_* intrinsics should become BIT_FIELD_REF. */ +#define VARIANT(TYPE64, TYPE128, SUFFIX) \ +TYPE64 \ +test_vget_high_##SUFFIX (TYPE128 vec) \ +{ \ + return vget_high_##SUFFIX (vec); \ +} + +VARIANTS + +/* { dg-final { scan-tree-dump-times "BIT_FIELD_REF <vec_\[0-9\]*\\\(D\\\), 64, 64>" 12 "optimized" } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/vget_high_2_be.c b/gcc/testsuite/gcc.target/aarch64/vget_high_2_be.c new file mode 100644 index 00000000000..5928c3a4597 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/vget_high_2_be.c @@ -0,0 +1,31 @@ +/* { dg-do compile } */ +/* { dg-require-effective-target stdint_types_mbig_endian } */ +/* { dg-options "-O3 -fdump-tree-optimized -mbig-endian" } */ + +#include <arm_neon.h> + +#define VARIANTS \ +VARIANT (uint8x8_t, uint8x16_t, u8) \ +VARIANT (uint16x4_t, uint16x8_t, u16) \ +VARIANT (uint32x2_t, uint32x4_t, u32) \ +VARIANT (uint64x1_t, uint64x2_t, u64) \ +VARIANT (int8x8_t, int8x16_t, s8) \ +VARIANT (int16x4_t, int16x8_t, s16) \ +VARIANT (int32x2_t, int32x4_t, s32) \ +VARIANT (int64x1_t, int64x2_t, s64) \ +VARIANT (float16x4_t, float16x8_t, f16) \ +VARIANT (float32x2_t, float32x4_t, f32) \ +VARIANT (float64x1_t, float64x2_t, f64) \ +VARIANT (bfloat16x4_t, bfloat16x8_t, bf16) + +/* vget_high_* intrinsics should become BIT_FIELD_REF. */ +#define VARIANT(TYPE64, TYPE128, SUFFIX) \ +TYPE64 \ +test_vget_high_##SUFFIX (TYPE128 vec) \ +{ \ + return vget_high_##SUFFIX (vec); \ +} + +VARIANTS + +/* { dg-final { scan-tree-dump-times "BIT_FIELD_REF <vec_\[0-9\]*\\\(D\\\), 64, 0>" 12 "optimized" } } */
This patch is a follow-up of r15-697-ga2e4fe5a53cf75 to also fold vget_high_* intrinsics to BIT_FILED_REF and remove the vget_high_* definitions from arm_neon.h to use the new intrinsics framework. PR target/102171 gcc/ChangeLog: * config/aarch64/aarch64-builtins.cc (AARCH64_SIMD_VGET_HIGH_BUILTINS): New macro to create definitions for all vget_high intrinsics. (VGET_HIGH_BUILTIN): Likewise. (enum aarch64_builtins): Add vget_high function codes. (AARCH64_SIMD_VGET_LOW_BUILTINS): Delete duplicate macro. (aarch64_general_fold_builtin): Fold vget_high calls. * config/aarch64/aarch64-simd-builtins.def: Delete vget_high builtins. * config/aarch64/aarch64-simd.md (aarch64_get_high<mode>): Delete. (aarch64_vget_hi_halfv8bf): Likewise. * config/aarch64/arm_neon.h (__attribute__): Delete. (vget_high_f16): Likewise. (vget_high_f32): Likewise. (vget_high_f64): Likewise. (vget_high_p8): Likewise. (vget_high_p16): Likewise. (vget_high_p64): Likewise. (vget_high_s8): Likewise. (vget_high_s16): Likewise. (vget_high_s32): Likewise. (vget_high_s64): Likewise. (vget_high_u8): Likewise. (vget_high_u16): Likewise. (vget_high_u32): Likewise. (vget_high_u64): Likewise. (vget_high_bf16): Likewise. gcc/testsuite/ChangeLog: * gcc.target/aarch64/vget_high_2.c: New test. * gcc.target/aarch64/vget_high_2_be.c: New test. Signed-off-by: Pengxuan Zheng <quic_pzheng@quicinc.com> --- gcc/config/aarch64/aarch64-builtins.cc | 59 +++++++--- gcc/config/aarch64/aarch64-simd-builtins.def | 6 - gcc/config/aarch64/aarch64-simd.md | 22 ---- gcc/config/aarch64/arm_neon.h | 105 ------------------ .../gcc.target/aarch64/vget_high_2.c | 30 +++++ .../gcc.target/aarch64/vget_high_2_be.c | 31 ++++++ 6 files changed, 104 insertions(+), 149 deletions(-) create mode 100644 gcc/testsuite/gcc.target/aarch64/vget_high_2.c create mode 100644 gcc/testsuite/gcc.target/aarch64/vget_high_2_be.c