diff mbox series

[1/2,AArch64] Update Armv8.4-a's FP16 FML intrinsics

Message ID 20190220140009.GA32564@arm.com
State New
Headers show
Series [1/2,AArch64] Update Armv8.4-a's FP16 FML intrinsics | expand

Commit Message

Tamar Christina Feb. 20, 2019, 2 p.m. UTC
Hi All,

This patch updates the Armv8.4-a FP16 FML intrinsics's suffixes from u32 to f16
to be more consistent with the naming convention for intrinsics.

The specifications for these intrinsics have not been published yet so we do
not need to maintain the old names.

The patch was created with the following script:

grep -lIE "(vfml[as].+)_u32" -r gcc/ | grep -iEv ".+Changelog.*" \
  | xargs sed -i -E -e "s/(vfml[as].+)_u32/\1_f16/g"

Bootstrapped Regtested on aarch64-none-linux-gnu and no issues.

Ok for trunk? and eventual backport to GCC 8?

Thanks,
Tamar

gcc/ChangeLog:

2019-02-20  Tamar Christina  <tamar.christina@arm.com>

	* config/aarch64/arm_neon.h (vfmlal_low_u32, vfmlsl_low_u32,
	vfmlalq_low_u32, vfmlslq_low_u32, vfmlal_high_u32, vfmlsl_high_u32,
	vfmlalq_high_u32, vfmlslq_high_u32, vfmlal_lane_low_u32,
	vfmlsl_lane_low_u32, vfmlal_laneq_low_u32, vfmlsl_laneq_low_u32,
	vfmlalq_lane_low_u32, vfmlslq_lane_low_u32, vfmlalq_laneq_low_u32,
	vfmlslq_laneq_low_u32, vfmlal_lane_high_u32, vfmlsl_lane_high_u32,
	vfmlal_laneq_high_u32, vfmlsl_laneq_high_u32, vfmlalq_lane_high_u32,
	vfmlslq_lane_high_u32, vfmlalq_laneq_high_u32, vfmlslq_laneq_high_u32):
	Rename ...
	(vfmlal_low_f16, vfmlsl_low_f16, vfmlalq_low_f16, vfmlslq_low_f16,
	vfmlal_high_f16, vfmlsl_high_f16, vfmlalq_high_f16, vfmlslq_high_f16,
	vfmlal_lane_low_f16, vfmlsl_lane_low_f16, vfmlal_laneq_low_f16,
	vfmlsl_laneq_low_f16, vfmlalq_lane_low_f16, vfmlslq_lane_low_f16,
	vfmlalq_laneq_low_f16, vfmlslq_laneq_low_f16, vfmlal_lane_high_f16,
	vfmlsl_lane_high_f16, vfmlal_laneq_high_f16, vfmlsl_laneq_high_f16,
	vfmlalq_lane_high_f16, vfmlslq_lane_high_f16, vfmlalq_laneq_high_f16,
	vfmlslq_laneq_high_f16): ... To this.

gcc/testsuite/ChangeLog:

2019-02-20  Tamar Christina  <tamar.christina@arm.com>

	* gcc.target/aarch64/fp16_fmul_high.h (test_vfmlal_high_u32,
	test_vfmlalq_high_u32, test_vfmlsl_high_u32, test_vfmlslq_high_u32):
	Rename ...
	(test_vfmlal_high_f16, test_vfmlalq_high_f16, test_vfmlsl_high_f16,
	test_vfmlslq_high_f16): ... To this.
	* gcc.target/aarch64/fp16_fmul_lane_high.h (test_vfmlal_lane_high_u32,
	tets_vfmlsl_lane_high_u32, test_vfmlal_laneq_high_u32,
	test_vfmlsl_laneq_high_u32, test_vfmlalq_lane_high_u32,
	test_vfmlslq_lane_high_u32, test_vfmlalq_laneq_high_u32,
	test_vfmlslq_laneq_high_u32): Rename ...
	(test_vfmlal_lane_high_f16, tets_vfmlsl_lane_high_f16,
	test_vfmlal_laneq_high_f16, test_vfmlsl_laneq_high_f16,
	test_vfmlalq_lane_high_f16, test_vfmlslq_lane_high_f16,
	test_vfmlalq_laneq_high_f16, test_vfmlslq_laneq_high_f16): ... To this.
	* gcc.target/aarch64/fp16_fmul_lane_low.h (test_vfmlal_lane_low_u32,
	test_vfmlsl_lane_low_u32, test_vfmlal_laneq_low_u32,
	test_vfmlsl_laneq_low_u32, test_vfmlalq_lane_low_u32,
	test_vfmlslq_lane_low_u32, test_vfmlalq_laneq_low_u32,
	test_vfmlslq_laneq_low_u32): Rename ...
	(test_vfmlal_lane_low_f16, test_vfmlsl_lane_low_f16,
	test_vfmlal_laneq_low_f16, test_vfmlsl_laneq_low_f16,
	test_vfmlalq_lane_low_f16, test_vfmlslq_lane_low_f16,
	test_vfmlalq_laneq_low_f16, test_vfmlslq_laneq_low_f16): ... To this.
	* gcc.target/aarch64/fp16_fmul_low.h (test_vfmlal_low_u32,
	test_vfmlalq_low_u32, test_vfmlsl_low_u32, test_vfmlslq_low_u32):
	Rename ...
	(test_vfmlal_low_f16, test_vfmlalq_low_f16, test_vfmlsl_low_f16,
	test_vfmlslq_low_f16): ... To This.
	* lib/target-supports.exp
	(check_effective_target_arm_fp16fml_neon_ok_nocache): Update test.

--

Comments

James Greenhalgh Feb. 21, 2019, 10:45 p.m. UTC | #1
On Wed, Feb 20, 2019 at 08:00:13AM -0600, Tamar Christina wrote:
> Hi All,
> 
> This patch updates the Armv8.4-a FP16 FML intrinsics's suffixes from u32 to f16
> to be more consistent with the naming convention for intrinsics.
> 
> The specifications for these intrinsics have not been published yet so we do
> not need to maintain the old names.
> 
> The patch was created with the following script:
> 
> grep -lIE "(vfml[as].+)_u32" -r gcc/ | grep -iEv ".+Changelog.*" \
>   | xargs sed -i -E -e "s/(vfml[as].+)_u32/\1_f16/g"

Big bonus points for including this!

> Bootstrapped Regtested on aarch64-none-linux-gnu and no issues.
> 
> Ok for trunk? and eventual backport to GCC 8?

Woops.

Yes, OK for trunk and backport it please.

Thanks,
James

> gcc/ChangeLog:
> 
> 2019-02-20  Tamar Christina  <tamar.christina@arm.com>
> 
> 	* config/aarch64/arm_neon.h (vfmlal_low_u32, vfmlsl_low_u32,
> 	vfmlalq_low_u32, vfmlslq_low_u32, vfmlal_high_u32, vfmlsl_high_u32,
> 	vfmlalq_high_u32, vfmlslq_high_u32, vfmlal_lane_low_u32,
> 	vfmlsl_lane_low_u32, vfmlal_laneq_low_u32, vfmlsl_laneq_low_u32,
> 	vfmlalq_lane_low_u32, vfmlslq_lane_low_u32, vfmlalq_laneq_low_u32,
> 	vfmlslq_laneq_low_u32, vfmlal_lane_high_u32, vfmlsl_lane_high_u32,
> 	vfmlal_laneq_high_u32, vfmlsl_laneq_high_u32, vfmlalq_lane_high_u32,
> 	vfmlslq_lane_high_u32, vfmlalq_laneq_high_u32, vfmlslq_laneq_high_u32):
> 	Rename ...
> 	(vfmlal_low_f16, vfmlsl_low_f16, vfmlalq_low_f16, vfmlslq_low_f16,
> 	vfmlal_high_f16, vfmlsl_high_f16, vfmlalq_high_f16, vfmlslq_high_f16,
> 	vfmlal_lane_low_f16, vfmlsl_lane_low_f16, vfmlal_laneq_low_f16,
> 	vfmlsl_laneq_low_f16, vfmlalq_lane_low_f16, vfmlslq_lane_low_f16,
> 	vfmlalq_laneq_low_f16, vfmlslq_laneq_low_f16, vfmlal_lane_high_f16,
> 	vfmlsl_lane_high_f16, vfmlal_laneq_high_f16, vfmlsl_laneq_high_f16,
> 	vfmlalq_lane_high_f16, vfmlslq_lane_high_f16, vfmlalq_laneq_high_f16,
> 	vfmlslq_laneq_high_f16): ... To this.
> 
> gcc/testsuite/ChangeLog:
> 
> 2019-02-20  Tamar Christina  <tamar.christina@arm.com>
> 
> 	* gcc.target/aarch64/fp16_fmul_high.h (test_vfmlal_high_u32,
> 	test_vfmlalq_high_u32, test_vfmlsl_high_u32, test_vfmlslq_high_u32):
> 	Rename ...
> 	(test_vfmlal_high_f16, test_vfmlalq_high_f16, test_vfmlsl_high_f16,
> 	test_vfmlslq_high_f16): ... To this.
> 	* gcc.target/aarch64/fp16_fmul_lane_high.h (test_vfmlal_lane_high_u32,
> 	tets_vfmlsl_lane_high_u32, test_vfmlal_laneq_high_u32,
> 	test_vfmlsl_laneq_high_u32, test_vfmlalq_lane_high_u32,
> 	test_vfmlslq_lane_high_u32, test_vfmlalq_laneq_high_u32,
> 	test_vfmlslq_laneq_high_u32): Rename ...
> 	(test_vfmlal_lane_high_f16, tets_vfmlsl_lane_high_f16,
> 	test_vfmlal_laneq_high_f16, test_vfmlsl_laneq_high_f16,
> 	test_vfmlalq_lane_high_f16, test_vfmlslq_lane_high_f16,
> 	test_vfmlalq_laneq_high_f16, test_vfmlslq_laneq_high_f16): ... To this.
> 	* gcc.target/aarch64/fp16_fmul_lane_low.h (test_vfmlal_lane_low_u32,
> 	test_vfmlsl_lane_low_u32, test_vfmlal_laneq_low_u32,
> 	test_vfmlsl_laneq_low_u32, test_vfmlalq_lane_low_u32,
> 	test_vfmlslq_lane_low_u32, test_vfmlalq_laneq_low_u32,
> 	test_vfmlslq_laneq_low_u32): Rename ...
> 	(test_vfmlal_lane_low_f16, test_vfmlsl_lane_low_f16,
> 	test_vfmlal_laneq_low_f16, test_vfmlsl_laneq_low_f16,
> 	test_vfmlalq_lane_low_f16, test_vfmlslq_lane_low_f16,
> 	test_vfmlalq_laneq_low_f16, test_vfmlslq_laneq_low_f16): ... To this.
> 	* gcc.target/aarch64/fp16_fmul_low.h (test_vfmlal_low_u32,
> 	test_vfmlalq_low_u32, test_vfmlsl_low_u32, test_vfmlslq_low_u32):
> 	Rename ...
> 	(test_vfmlal_low_f16, test_vfmlalq_low_f16, test_vfmlsl_low_f16,
> 	test_vfmlslq_low_f16): ... To This.
> 	* lib/target-supports.exp
> 	(check_effective_target_arm_fp16fml_neon_ok_nocache): Update test.
> 
> -- 

> diff --git a/gcc/config/aarch64/arm_neon.h b/gcc/config/aarch64/arm_neon.h
> index f405a325cf5f3f8970e5f4b78322335c280fa7a4..314ef30187d1ba1882eaf5c610770d380344e920 100644
> --- a/gcc/config/aarch64/arm_neon.h
> +++ b/gcc/config/aarch64/arm_neon.h
> @@ -33777,63 +33777,63 @@ vcmlaq_rot270_laneq_f32 (float32x4_t __r, float32x4_t __a, float32x4_t __b,
>  
>  __extension__ extern __inline float32x2_t
>  __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -vfmlal_low_u32 (float32x2_t __r, float16x4_t __a, float16x4_t __b)
> +vfmlal_low_f16 (float32x2_t __r, float16x4_t __a, float16x4_t __b)
>  {
>    return __builtin_aarch64_fmlal_lowv2sf (__r, __a, __b);
>  }
>  
>  __extension__ extern __inline float32x2_t
>  __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -vfmlsl_low_u32 (float32x2_t __r, float16x4_t __a, float16x4_t __b)
> +vfmlsl_low_f16 (float32x2_t __r, float16x4_t __a, float16x4_t __b)
>  {
>    return __builtin_aarch64_fmlsl_lowv2sf (__r, __a, __b);
>  }
>  
>  __extension__ extern __inline float32x4_t
>  __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -vfmlalq_low_u32 (float32x4_t __r, float16x8_t __a, float16x8_t __b)
> +vfmlalq_low_f16 (float32x4_t __r, float16x8_t __a, float16x8_t __b)
>  {
>    return __builtin_aarch64_fmlalq_lowv4sf (__r, __a, __b);
>  }
>  
>  __extension__ extern __inline float32x4_t
>  __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -vfmlslq_low_u32 (float32x4_t __r, float16x8_t __a, float16x8_t __b)
> +vfmlslq_low_f16 (float32x4_t __r, float16x8_t __a, float16x8_t __b)
>  {
>    return __builtin_aarch64_fmlslq_lowv4sf (__r, __a, __b);
>  }
>  
>  __extension__ extern __inline float32x2_t
>  __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -vfmlal_high_u32 (float32x2_t __r, float16x4_t __a, float16x4_t __b)
> +vfmlal_high_f16 (float32x2_t __r, float16x4_t __a, float16x4_t __b)
>  {
>    return __builtin_aarch64_fmlal_highv2sf (__r, __a, __b);
>  }
>  
>  __extension__ extern __inline float32x2_t
>  __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -vfmlsl_high_u32 (float32x2_t __r, float16x4_t __a, float16x4_t __b)
> +vfmlsl_high_f16 (float32x2_t __r, float16x4_t __a, float16x4_t __b)
>  {
>    return __builtin_aarch64_fmlsl_highv2sf (__r, __a, __b);
>  }
>  
>  __extension__ extern __inline float32x4_t
>  __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -vfmlalq_high_u32 (float32x4_t __r, float16x8_t __a, float16x8_t __b)
> +vfmlalq_high_f16 (float32x4_t __r, float16x8_t __a, float16x8_t __b)
>  {
>    return __builtin_aarch64_fmlalq_highv4sf (__r, __a, __b);
>  }
>  
>  __extension__ extern __inline float32x4_t
>  __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -vfmlslq_high_u32 (float32x4_t __r, float16x8_t __a, float16x8_t __b)
> +vfmlslq_high_f16 (float32x4_t __r, float16x8_t __a, float16x8_t __b)
>  {
>    return __builtin_aarch64_fmlslq_highv4sf (__r, __a, __b);
>  }
>  
>  __extension__ extern __inline float32x2_t
>  __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -vfmlal_lane_low_u32 (float32x2_t __r, float16x4_t __a, float16x4_t __b,
> +vfmlal_lane_low_f16 (float32x2_t __r, float16x4_t __a, float16x4_t __b,
>  		     const int __lane)
>  {
>    return __builtin_aarch64_fmlal_lane_lowv2sf (__r, __a, __b, __lane);
> @@ -33841,7 +33841,7 @@ vfmlal_lane_low_u32 (float32x2_t __r, float16x4_t __a, float16x4_t __b,
>  
>  __extension__ extern __inline float32x2_t
>  __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -vfmlsl_lane_low_u32 (float32x2_t __r, float16x4_t __a, float16x4_t __b,
> +vfmlsl_lane_low_f16 (float32x2_t __r, float16x4_t __a, float16x4_t __b,
>  		     const int __lane)
>  {
>    return __builtin_aarch64_fmlsl_lane_lowv2sf (__r, __a, __b, __lane);
> @@ -33849,7 +33849,7 @@ vfmlsl_lane_low_u32 (float32x2_t __r, float16x4_t __a, float16x4_t __b,
>  
>  __extension__ extern __inline float32x2_t
>  __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -vfmlal_laneq_low_u32 (float32x2_t __r, float16x4_t __a, float16x8_t __b,
> +vfmlal_laneq_low_f16 (float32x2_t __r, float16x4_t __a, float16x8_t __b,
>  		      const int __lane)
>  {
>    return __builtin_aarch64_fmlal_laneq_lowv2sf (__r, __a, __b, __lane);
> @@ -33857,7 +33857,7 @@ vfmlal_laneq_low_u32 (float32x2_t __r, float16x4_t __a, float16x8_t __b,
>  
>  __extension__ extern __inline float32x2_t
>  __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -vfmlsl_laneq_low_u32 (float32x2_t __r, float16x4_t __a, float16x8_t __b,
> +vfmlsl_laneq_low_f16 (float32x2_t __r, float16x4_t __a, float16x8_t __b,
>  		      const int __lane)
>  {
>    return __builtin_aarch64_fmlsl_laneq_lowv2sf (__r, __a, __b, __lane);
> @@ -33865,7 +33865,7 @@ vfmlsl_laneq_low_u32 (float32x2_t __r, float16x4_t __a, float16x8_t __b,
>  
>  __extension__ extern __inline float32x4_t
>  __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -vfmlalq_lane_low_u32 (float32x4_t __r, float16x8_t __a, float16x4_t __b,
> +vfmlalq_lane_low_f16 (float32x4_t __r, float16x8_t __a, float16x4_t __b,
>  		      const int __lane)
>  {
>    return __builtin_aarch64_fmlalq_lane_lowv4sf (__r, __a, __b, __lane);
> @@ -33873,7 +33873,7 @@ vfmlalq_lane_low_u32 (float32x4_t __r, float16x8_t __a, float16x4_t __b,
>  
>  __extension__ extern __inline float32x4_t
>  __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -vfmlslq_lane_low_u32 (float32x4_t __r, float16x8_t __a, float16x4_t __b,
> +vfmlslq_lane_low_f16 (float32x4_t __r, float16x8_t __a, float16x4_t __b,
>  		      const int __lane)
>  {
>    return __builtin_aarch64_fmlslq_lane_lowv4sf (__r, __a, __b, __lane);
> @@ -33881,7 +33881,7 @@ vfmlslq_lane_low_u32 (float32x4_t __r, float16x8_t __a, float16x4_t __b,
>  
>  __extension__ extern __inline float32x4_t
>  __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -vfmlalq_laneq_low_u32 (float32x4_t __r, float16x8_t __a, float16x8_t __b,
> +vfmlalq_laneq_low_f16 (float32x4_t __r, float16x8_t __a, float16x8_t __b,
>  		       const int __lane)
>  {
>    return __builtin_aarch64_fmlalq_laneq_lowv4sf (__r, __a, __b, __lane);
> @@ -33889,7 +33889,7 @@ vfmlalq_laneq_low_u32 (float32x4_t __r, float16x8_t __a, float16x8_t __b,
>  
>  __extension__ extern __inline float32x4_t
>  __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -vfmlslq_laneq_low_u32 (float32x4_t __r, float16x8_t __a, float16x8_t __b,
> +vfmlslq_laneq_low_f16 (float32x4_t __r, float16x8_t __a, float16x8_t __b,
>  		      const int __lane)
>  {
>    return __builtin_aarch64_fmlslq_laneq_lowv4sf (__r, __a, __b, __lane);
> @@ -33897,7 +33897,7 @@ vfmlslq_laneq_low_u32 (float32x4_t __r, float16x8_t __a, float16x8_t __b,
>  
>  __extension__ extern __inline float32x2_t
>  __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -vfmlal_lane_high_u32 (float32x2_t __r, float16x4_t __a, float16x4_t __b,
> +vfmlal_lane_high_f16 (float32x2_t __r, float16x4_t __a, float16x4_t __b,
>  		     const int __lane)
>  {
>    return __builtin_aarch64_fmlal_lane_highv2sf (__r, __a, __b, __lane);
> @@ -33905,7 +33905,7 @@ vfmlal_lane_high_u32 (float32x2_t __r, float16x4_t __a, float16x4_t __b,
>  
>  __extension__ extern __inline float32x2_t
>  __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -vfmlsl_lane_high_u32 (float32x2_t __r, float16x4_t __a, float16x4_t __b,
> +vfmlsl_lane_high_f16 (float32x2_t __r, float16x4_t __a, float16x4_t __b,
>  		     const int __lane)
>  {
>    return __builtin_aarch64_fmlsl_lane_highv2sf (__r, __a, __b, __lane);
> @@ -33913,7 +33913,7 @@ vfmlsl_lane_high_u32 (float32x2_t __r, float16x4_t __a, float16x4_t __b,
>  
>  __extension__ extern __inline float32x2_t
>  __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -vfmlal_laneq_high_u32 (float32x2_t __r, float16x4_t __a, float16x8_t __b,
> +vfmlal_laneq_high_f16 (float32x2_t __r, float16x4_t __a, float16x8_t __b,
>  		      const int __lane)
>  {
>    return __builtin_aarch64_fmlal_laneq_highv2sf (__r, __a, __b, __lane);
> @@ -33921,7 +33921,7 @@ vfmlal_laneq_high_u32 (float32x2_t __r, float16x4_t __a, float16x8_t __b,
>  
>  __extension__ extern __inline float32x2_t
>  __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -vfmlsl_laneq_high_u32 (float32x2_t __r, float16x4_t __a, float16x8_t __b,
> +vfmlsl_laneq_high_f16 (float32x2_t __r, float16x4_t __a, float16x8_t __b,
>  		      const int __lane)
>  {
>    return __builtin_aarch64_fmlsl_laneq_highv2sf (__r, __a, __b, __lane);
> @@ -33929,7 +33929,7 @@ vfmlsl_laneq_high_u32 (float32x2_t __r, float16x4_t __a, float16x8_t __b,
>  
>  __extension__ extern __inline float32x4_t
>  __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -vfmlalq_lane_high_u32 (float32x4_t __r, float16x8_t __a, float16x4_t __b,
> +vfmlalq_lane_high_f16 (float32x4_t __r, float16x8_t __a, float16x4_t __b,
>  		      const int __lane)
>  {
>    return __builtin_aarch64_fmlalq_lane_highv4sf (__r, __a, __b, __lane);
> @@ -33937,7 +33937,7 @@ vfmlalq_lane_high_u32 (float32x4_t __r, float16x8_t __a, float16x4_t __b,
>  
>  __extension__ extern __inline float32x4_t
>  __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -vfmlslq_lane_high_u32 (float32x4_t __r, float16x8_t __a, float16x4_t __b,
> +vfmlslq_lane_high_f16 (float32x4_t __r, float16x8_t __a, float16x4_t __b,
>  		      const int __lane)
>  {
>    return __builtin_aarch64_fmlslq_lane_highv4sf (__r, __a, __b, __lane);
> @@ -33945,7 +33945,7 @@ vfmlslq_lane_high_u32 (float32x4_t __r, float16x8_t __a, float16x4_t __b,
>  
>  __extension__ extern __inline float32x4_t
>  __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -vfmlalq_laneq_high_u32 (float32x4_t __r, float16x8_t __a, float16x8_t __b,
> +vfmlalq_laneq_high_f16 (float32x4_t __r, float16x8_t __a, float16x8_t __b,
>  		       const int __lane)
>  {
>    return __builtin_aarch64_fmlalq_laneq_highv4sf (__r, __a, __b, __lane);
> @@ -33953,7 +33953,7 @@ vfmlalq_laneq_high_u32 (float32x4_t __r, float16x8_t __a, float16x8_t __b,
>  
>  __extension__ extern __inline float32x4_t
>  __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -vfmlslq_laneq_high_u32 (float32x4_t __r, float16x8_t __a, float16x8_t __b,
> +vfmlslq_laneq_high_f16 (float32x4_t __r, float16x8_t __a, float16x8_t __b,
>  		      const int __lane)
>  {
>    return __builtin_aarch64_fmlslq_laneq_highv4sf (__r, __a, __b, __lane);
> diff --git a/gcc/testsuite/gcc.target/aarch64/fp16_fmul_high.h b/gcc/testsuite/gcc.target/aarch64/fp16_fmul_high.h
> index 9c86bd19153cc0888f7b28f36d141b9fe08f535e..def85038a7208725ecb1db0888a1cc651aaa4934 100644
> --- a/gcc/testsuite/gcc.target/aarch64/fp16_fmul_high.h
> +++ b/gcc/testsuite/gcc.target/aarch64/fp16_fmul_high.h
> @@ -1,25 +1,25 @@
>  #include "arm_neon.h"
>  
>  float32x2_t
> -test_vfmlal_high_u32 (float32x2_t r, float16x4_t a, float16x4_t b)
> +test_vfmlal_high_f16 (float32x2_t r, float16x4_t a, float16x4_t b)
>  {
> -  return vfmlal_high_u32 (r, a, b);
> +  return vfmlal_high_f16 (r, a, b);
>  }
>  
>  float32x4_t
> -test_vfmlalq_high_u32 (float32x4_t r, float16x8_t a, float16x8_t b)
> +test_vfmlalq_high_f16 (float32x4_t r, float16x8_t a, float16x8_t b)
>  {
> -  return vfmlalq_high_u32 (r, a, b);
> +  return vfmlalq_high_f16 (r, a, b);
>  }
>  
>  float32x2_t
> -test_vfmlsl_high_u32 (float32x2_t r, float16x4_t a, float16x4_t b)
> +test_vfmlsl_high_f16 (float32x2_t r, float16x4_t a, float16x4_t b)
>  {
> -  return vfmlsl_high_u32 (r, a, b);
> +  return vfmlsl_high_f16 (r, a, b);
>  }
>  
>  float32x4_t
> -test_vfmlslq_high_u32 (float32x4_t r, float16x8_t a, float16x8_t b)
> +test_vfmlslq_high_f16 (float32x4_t r, float16x8_t a, float16x8_t b)
>  {
> -  return vfmlslq_high_u32 (r, a, b);
> +  return vfmlslq_high_f16 (r, a, b);
>  }
> diff --git a/gcc/testsuite/gcc.target/aarch64/fp16_fmul_lane_high.h b/gcc/testsuite/gcc.target/aarch64/fp16_fmul_lane_high.h
> index 1039347865e0bc79dfe351fd52f36964e7c41188..a0b95f8b81e4799a6075b0f0fca6834f73de0dc8 100644
> --- a/gcc/testsuite/gcc.target/aarch64/fp16_fmul_lane_high.h
> +++ b/gcc/testsuite/gcc.target/aarch64/fp16_fmul_lane_high.h
> @@ -1,49 +1,49 @@
>  #include "arm_neon.h"
>  
>  float32x2_t
> -test_vfmlal_lane_high_u32 (float32x2_t r, float16x4_t a, float16x4_t b)
> +test_vfmlal_lane_high_f16 (float32x2_t r, float16x4_t a, float16x4_t b)
>  {
> -  return vfmlal_lane_high_u32 (r, a, b, 0);
> +  return vfmlal_lane_high_f16 (r, a, b, 0);
>  }
>  
>  float32x2_t
> -tets_vfmlsl_lane_high_u32  (float32x2_t r, float16x4_t a, float16x4_t b)
> +tets_vfmlsl_lane_high_f16  (float32x2_t r, float16x4_t a, float16x4_t b)
>  {
> -  return vfmlsl_lane_high_u32 (r, a, b, 0);
> +  return vfmlsl_lane_high_f16 (r, a, b, 0);
>  }
>  
>  float32x2_t
> -test_vfmlal_laneq_high_u32 (float32x2_t r, float16x4_t a, float16x8_t b)
> +test_vfmlal_laneq_high_f16 (float32x2_t r, float16x4_t a, float16x8_t b)
>  {
> -  return vfmlal_laneq_high_u32 (r, a, b, 6);
> +  return vfmlal_laneq_high_f16 (r, a, b, 6);
>  }
>  
>  float32x2_t
> -test_vfmlsl_laneq_high_u32 (float32x2_t r, float16x4_t a, float16x8_t b)
> +test_vfmlsl_laneq_high_f16 (float32x2_t r, float16x4_t a, float16x8_t b)
>  {
> -  return vfmlsl_laneq_high_u32 (r, a, b, 6);
> +  return vfmlsl_laneq_high_f16 (r, a, b, 6);
>  }
>  
>  float32x4_t
> -test_vfmlalq_lane_high_u32 (float32x4_t r, float16x8_t a, float16x4_t b)
> +test_vfmlalq_lane_high_f16 (float32x4_t r, float16x8_t a, float16x4_t b)
>  {
> -  return vfmlalq_lane_high_u32 (r, a, b, 1);
> +  return vfmlalq_lane_high_f16 (r, a, b, 1);
>  }
>  
>  float32x4_t
> -test_vfmlslq_lane_high_u32 (float32x4_t r, float16x8_t a, float16x4_t b)
> +test_vfmlslq_lane_high_f16 (float32x4_t r, float16x8_t a, float16x4_t b)
>  {
> -  return vfmlslq_lane_high_u32 (r, a, b, 1);
> +  return vfmlslq_lane_high_f16 (r, a, b, 1);
>  }
>  
>  float32x4_t
> -test_vfmlalq_laneq_high_u32  (float32x4_t r, float16x8_t a, float16x8_t b)
> +test_vfmlalq_laneq_high_f16  (float32x4_t r, float16x8_t a, float16x8_t b)
>  {
> -  return vfmlalq_laneq_high_u32 (r, a, b, 7);
> +  return vfmlalq_laneq_high_f16 (r, a, b, 7);
>  }
>  
>  float32x4_t
> -test_vfmlslq_laneq_high_u32 (float32x4_t r, float16x8_t a, float16x8_t b)
> +test_vfmlslq_laneq_high_f16 (float32x4_t r, float16x8_t a, float16x8_t b)
>  {
> -  return vfmlslq_laneq_high_u32 (r, a, b, 7);
> +  return vfmlslq_laneq_high_f16 (r, a, b, 7);
>  }
> diff --git a/gcc/testsuite/gcc.target/aarch64/fp16_fmul_lane_low.h b/gcc/testsuite/gcc.target/aarch64/fp16_fmul_lane_low.h
> index b689741bdb006e89f14f29b803ba6d38a62b387e..bf49829c4bec941970eaf4e32cabf65719be9eaa 100644
> --- a/gcc/testsuite/gcc.target/aarch64/fp16_fmul_lane_low.h
> +++ b/gcc/testsuite/gcc.target/aarch64/fp16_fmul_lane_low.h
> @@ -1,49 +1,49 @@
>  #include "arm_neon.h"
>  
>  float32x2_t
> -test_vfmlal_lane_low_u32 (float32x2_t r, float16x4_t a, float16x4_t b)
> +test_vfmlal_lane_low_f16 (float32x2_t r, float16x4_t a, float16x4_t b)
>  {
> -  return vfmlal_lane_low_u32 (r, a, b, 0);
> +  return vfmlal_lane_low_f16 (r, a, b, 0);
>  }
>  
>  float32x2_t
> -test_vfmlsl_lane_low_u32 (float32x2_t r, float16x4_t a, float16x4_t b)
> +test_vfmlsl_lane_low_f16 (float32x2_t r, float16x4_t a, float16x4_t b)
>  {
> -  return vfmlsl_lane_low_u32 (r, a, b, 0);
> +  return vfmlsl_lane_low_f16 (r, a, b, 0);
>  }
>  
>  float32x2_t
> -test_vfmlal_laneq_low_u32 (float32x2_t r, float16x4_t a, float16x8_t b)
> +test_vfmlal_laneq_low_f16 (float32x2_t r, float16x4_t a, float16x8_t b)
>  {
> -  return vfmlal_laneq_low_u32 (r, a, b, 6);
> +  return vfmlal_laneq_low_f16 (r, a, b, 6);
>  }
>  
>  float32x2_t
> -test_vfmlsl_laneq_low_u32 (float32x2_t r, float16x4_t a, float16x8_t b)
> +test_vfmlsl_laneq_low_f16 (float32x2_t r, float16x4_t a, float16x8_t b)
>  {
> -  return vfmlsl_laneq_low_u32 (r, a, b, 6);
> +  return vfmlsl_laneq_low_f16 (r, a, b, 6);
>  }
>  
>  float32x4_t
> -test_vfmlalq_lane_low_u32 (float32x4_t r, float16x8_t a, float16x4_t b)
> +test_vfmlalq_lane_low_f16 (float32x4_t r, float16x8_t a, float16x4_t b)
>  {
> -  return vfmlalq_lane_low_u32 (r, a, b, 1);
> +  return vfmlalq_lane_low_f16 (r, a, b, 1);
>  }
>  
>  float32x4_t
> -test_vfmlslq_lane_low_u32 (float32x4_t r, float16x8_t a, float16x4_t b)
> +test_vfmlslq_lane_low_f16 (float32x4_t r, float16x8_t a, float16x4_t b)
>  {
> -  return vfmlslq_lane_low_u32 (r, a, b, 1);
> +  return vfmlslq_lane_low_f16 (r, a, b, 1);
>  }
>  
>  float32x4_t
> -test_vfmlalq_laneq_low_u32 (float32x4_t r, float16x8_t a, float16x8_t b)
> +test_vfmlalq_laneq_low_f16 (float32x4_t r, float16x8_t a, float16x8_t b)
>  {
> -  return vfmlalq_laneq_low_u32 (r, a, b, 7);
> +  return vfmlalq_laneq_low_f16 (r, a, b, 7);
>  }
>  
>  float32x4_t
> -test_vfmlslq_laneq_low_u32 (float32x4_t r, float16x8_t a, float16x8_t b)
> +test_vfmlslq_laneq_low_f16 (float32x4_t r, float16x8_t a, float16x8_t b)
>  {
> -  return vfmlslq_laneq_low_u32 (r, a, b, 7);
> +  return vfmlslq_laneq_low_f16 (r, a, b, 7);
>  }
> diff --git a/gcc/testsuite/gcc.target/aarch64/fp16_fmul_low.h b/gcc/testsuite/gcc.target/aarch64/fp16_fmul_low.h
> index 778ca1c245c7343b38272e586a54927c7cd50bee..b039b548b5809f92a6ef0f91f6ab475b2b03866c 100644
> --- a/gcc/testsuite/gcc.target/aarch64/fp16_fmul_low.h
> +++ b/gcc/testsuite/gcc.target/aarch64/fp16_fmul_low.h
> @@ -1,25 +1,25 @@
>  #include "arm_neon.h"
>  
>  float32x2_t
> -test_vfmlal_low_u32 (float32x2_t r, float16x4_t a, float16x4_t b)
> +test_vfmlal_low_f16 (float32x2_t r, float16x4_t a, float16x4_t b)
>  {
> -  return vfmlal_low_u32 (r, a, b);
> +  return vfmlal_low_f16 (r, a, b);
>  }
>  
>  float32x4_t
> -test_vfmlalq_low_u32 (float32x4_t r, float16x8_t a, float16x8_t b)
> +test_vfmlalq_low_f16 (float32x4_t r, float16x8_t a, float16x8_t b)
>  {
> -  return vfmlalq_low_u32 (r, a, b);
> +  return vfmlalq_low_f16 (r, a, b);
>  }
>  
>  float32x2_t
> -test_vfmlsl_low_u32 (float32x2_t r, float16x4_t a, float16x4_t b)
> +test_vfmlsl_low_f16 (float32x2_t r, float16x4_t a, float16x4_t b)
>  {
> -  return vfmlsl_low_u32 (r, a, b);
> +  return vfmlsl_low_f16 (r, a, b);
>  }
>  
>  float32x4_t
> -test_vfmlslq_low_u32 (float32x4_t r, float16x8_t a, float16x8_t b)
> +test_vfmlslq_low_f16 (float32x4_t r, float16x8_t a, float16x8_t b)
>  {
> -  return vfmlslq_low_u32 (r, a, b);
> +  return vfmlslq_low_f16 (r, a, b);
>  }
> diff --git a/gcc/testsuite/lib/target-supports.exp b/gcc/testsuite/lib/target-supports.exp
> index c0df467e0175cd92c688cedebb97fd4ae87e985e..21ac2ee3b4c9591ac9efad6a1567e35fc8e3291b 100644
> --- a/gcc/testsuite/lib/target-supports.exp
> +++ b/gcc/testsuite/lib/target-supports.exp
> @@ -4522,7 +4522,7 @@ proc check_effective_target_arm_fp16fml_neon_ok_nocache { } {
>  		float32x2_t
>  		foo (float32x2_t r, float16x4_t a, float16x4_t b)
>  		{
> -		  return vfmlal_high_u32 (r, a, b);
> +		  return vfmlal_high_f16 (r, a, b);
>  		}
>          } "$flags -march=armv8.2-a+fp16fml"] } {
>              set et_arm_fp16fml_neon_flags "$flags -march=armv8.2-a+fp16fml"
>
diff mbox series

Patch

diff --git a/gcc/config/aarch64/arm_neon.h b/gcc/config/aarch64/arm_neon.h
index f405a325cf5f3f8970e5f4b78322335c280fa7a4..314ef30187d1ba1882eaf5c610770d380344e920 100644
--- a/gcc/config/aarch64/arm_neon.h
+++ b/gcc/config/aarch64/arm_neon.h
@@ -33777,63 +33777,63 @@  vcmlaq_rot270_laneq_f32 (float32x4_t __r, float32x4_t __a, float32x4_t __b,
 
 __extension__ extern __inline float32x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vfmlal_low_u32 (float32x2_t __r, float16x4_t __a, float16x4_t __b)
+vfmlal_low_f16 (float32x2_t __r, float16x4_t __a, float16x4_t __b)
 {
   return __builtin_aarch64_fmlal_lowv2sf (__r, __a, __b);
 }
 
 __extension__ extern __inline float32x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vfmlsl_low_u32 (float32x2_t __r, float16x4_t __a, float16x4_t __b)
+vfmlsl_low_f16 (float32x2_t __r, float16x4_t __a, float16x4_t __b)
 {
   return __builtin_aarch64_fmlsl_lowv2sf (__r, __a, __b);
 }
 
 __extension__ extern __inline float32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vfmlalq_low_u32 (float32x4_t __r, float16x8_t __a, float16x8_t __b)
+vfmlalq_low_f16 (float32x4_t __r, float16x8_t __a, float16x8_t __b)
 {
   return __builtin_aarch64_fmlalq_lowv4sf (__r, __a, __b);
 }
 
 __extension__ extern __inline float32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vfmlslq_low_u32 (float32x4_t __r, float16x8_t __a, float16x8_t __b)
+vfmlslq_low_f16 (float32x4_t __r, float16x8_t __a, float16x8_t __b)
 {
   return __builtin_aarch64_fmlslq_lowv4sf (__r, __a, __b);
 }
 
 __extension__ extern __inline float32x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vfmlal_high_u32 (float32x2_t __r, float16x4_t __a, float16x4_t __b)
+vfmlal_high_f16 (float32x2_t __r, float16x4_t __a, float16x4_t __b)
 {
   return __builtin_aarch64_fmlal_highv2sf (__r, __a, __b);
 }
 
 __extension__ extern __inline float32x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vfmlsl_high_u32 (float32x2_t __r, float16x4_t __a, float16x4_t __b)
+vfmlsl_high_f16 (float32x2_t __r, float16x4_t __a, float16x4_t __b)
 {
   return __builtin_aarch64_fmlsl_highv2sf (__r, __a, __b);
 }
 
 __extension__ extern __inline float32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vfmlalq_high_u32 (float32x4_t __r, float16x8_t __a, float16x8_t __b)
+vfmlalq_high_f16 (float32x4_t __r, float16x8_t __a, float16x8_t __b)
 {
   return __builtin_aarch64_fmlalq_highv4sf (__r, __a, __b);
 }
 
 __extension__ extern __inline float32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vfmlslq_high_u32 (float32x4_t __r, float16x8_t __a, float16x8_t __b)
+vfmlslq_high_f16 (float32x4_t __r, float16x8_t __a, float16x8_t __b)
 {
   return __builtin_aarch64_fmlslq_highv4sf (__r, __a, __b);
 }
 
 __extension__ extern __inline float32x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vfmlal_lane_low_u32 (float32x2_t __r, float16x4_t __a, float16x4_t __b,
+vfmlal_lane_low_f16 (float32x2_t __r, float16x4_t __a, float16x4_t __b,
 		     const int __lane)
 {
   return __builtin_aarch64_fmlal_lane_lowv2sf (__r, __a, __b, __lane);
@@ -33841,7 +33841,7 @@  vfmlal_lane_low_u32 (float32x2_t __r, float16x4_t __a, float16x4_t __b,
 
 __extension__ extern __inline float32x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vfmlsl_lane_low_u32 (float32x2_t __r, float16x4_t __a, float16x4_t __b,
+vfmlsl_lane_low_f16 (float32x2_t __r, float16x4_t __a, float16x4_t __b,
 		     const int __lane)
 {
   return __builtin_aarch64_fmlsl_lane_lowv2sf (__r, __a, __b, __lane);
@@ -33849,7 +33849,7 @@  vfmlsl_lane_low_u32 (float32x2_t __r, float16x4_t __a, float16x4_t __b,
 
 __extension__ extern __inline float32x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vfmlal_laneq_low_u32 (float32x2_t __r, float16x4_t __a, float16x8_t __b,
+vfmlal_laneq_low_f16 (float32x2_t __r, float16x4_t __a, float16x8_t __b,
 		      const int __lane)
 {
   return __builtin_aarch64_fmlal_laneq_lowv2sf (__r, __a, __b, __lane);
@@ -33857,7 +33857,7 @@  vfmlal_laneq_low_u32 (float32x2_t __r, float16x4_t __a, float16x8_t __b,
 
 __extension__ extern __inline float32x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vfmlsl_laneq_low_u32 (float32x2_t __r, float16x4_t __a, float16x8_t __b,
+vfmlsl_laneq_low_f16 (float32x2_t __r, float16x4_t __a, float16x8_t __b,
 		      const int __lane)
 {
   return __builtin_aarch64_fmlsl_laneq_lowv2sf (__r, __a, __b, __lane);
@@ -33865,7 +33865,7 @@  vfmlsl_laneq_low_u32 (float32x2_t __r, float16x4_t __a, float16x8_t __b,
 
 __extension__ extern __inline float32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vfmlalq_lane_low_u32 (float32x4_t __r, float16x8_t __a, float16x4_t __b,
+vfmlalq_lane_low_f16 (float32x4_t __r, float16x8_t __a, float16x4_t __b,
 		      const int __lane)
 {
   return __builtin_aarch64_fmlalq_lane_lowv4sf (__r, __a, __b, __lane);
@@ -33873,7 +33873,7 @@  vfmlalq_lane_low_u32 (float32x4_t __r, float16x8_t __a, float16x4_t __b,
 
 __extension__ extern __inline float32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vfmlslq_lane_low_u32 (float32x4_t __r, float16x8_t __a, float16x4_t __b,
+vfmlslq_lane_low_f16 (float32x4_t __r, float16x8_t __a, float16x4_t __b,
 		      const int __lane)
 {
   return __builtin_aarch64_fmlslq_lane_lowv4sf (__r, __a, __b, __lane);
@@ -33881,7 +33881,7 @@  vfmlslq_lane_low_u32 (float32x4_t __r, float16x8_t __a, float16x4_t __b,
 
 __extension__ extern __inline float32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vfmlalq_laneq_low_u32 (float32x4_t __r, float16x8_t __a, float16x8_t __b,
+vfmlalq_laneq_low_f16 (float32x4_t __r, float16x8_t __a, float16x8_t __b,
 		       const int __lane)
 {
   return __builtin_aarch64_fmlalq_laneq_lowv4sf (__r, __a, __b, __lane);
@@ -33889,7 +33889,7 @@  vfmlalq_laneq_low_u32 (float32x4_t __r, float16x8_t __a, float16x8_t __b,
 
 __extension__ extern __inline float32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vfmlslq_laneq_low_u32 (float32x4_t __r, float16x8_t __a, float16x8_t __b,
+vfmlslq_laneq_low_f16 (float32x4_t __r, float16x8_t __a, float16x8_t __b,
 		      const int __lane)
 {
   return __builtin_aarch64_fmlslq_laneq_lowv4sf (__r, __a, __b, __lane);
@@ -33897,7 +33897,7 @@  vfmlslq_laneq_low_u32 (float32x4_t __r, float16x8_t __a, float16x8_t __b,
 
 __extension__ extern __inline float32x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vfmlal_lane_high_u32 (float32x2_t __r, float16x4_t __a, float16x4_t __b,
+vfmlal_lane_high_f16 (float32x2_t __r, float16x4_t __a, float16x4_t __b,
 		     const int __lane)
 {
   return __builtin_aarch64_fmlal_lane_highv2sf (__r, __a, __b, __lane);
@@ -33905,7 +33905,7 @@  vfmlal_lane_high_u32 (float32x2_t __r, float16x4_t __a, float16x4_t __b,
 
 __extension__ extern __inline float32x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vfmlsl_lane_high_u32 (float32x2_t __r, float16x4_t __a, float16x4_t __b,
+vfmlsl_lane_high_f16 (float32x2_t __r, float16x4_t __a, float16x4_t __b,
 		     const int __lane)
 {
   return __builtin_aarch64_fmlsl_lane_highv2sf (__r, __a, __b, __lane);
@@ -33913,7 +33913,7 @@  vfmlsl_lane_high_u32 (float32x2_t __r, float16x4_t __a, float16x4_t __b,
 
 __extension__ extern __inline float32x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vfmlal_laneq_high_u32 (float32x2_t __r, float16x4_t __a, float16x8_t __b,
+vfmlal_laneq_high_f16 (float32x2_t __r, float16x4_t __a, float16x8_t __b,
 		      const int __lane)
 {
   return __builtin_aarch64_fmlal_laneq_highv2sf (__r, __a, __b, __lane);
@@ -33921,7 +33921,7 @@  vfmlal_laneq_high_u32 (float32x2_t __r, float16x4_t __a, float16x8_t __b,
 
 __extension__ extern __inline float32x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vfmlsl_laneq_high_u32 (float32x2_t __r, float16x4_t __a, float16x8_t __b,
+vfmlsl_laneq_high_f16 (float32x2_t __r, float16x4_t __a, float16x8_t __b,
 		      const int __lane)
 {
   return __builtin_aarch64_fmlsl_laneq_highv2sf (__r, __a, __b, __lane);
@@ -33929,7 +33929,7 @@  vfmlsl_laneq_high_u32 (float32x2_t __r, float16x4_t __a, float16x8_t __b,
 
 __extension__ extern __inline float32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vfmlalq_lane_high_u32 (float32x4_t __r, float16x8_t __a, float16x4_t __b,
+vfmlalq_lane_high_f16 (float32x4_t __r, float16x8_t __a, float16x4_t __b,
 		      const int __lane)
 {
   return __builtin_aarch64_fmlalq_lane_highv4sf (__r, __a, __b, __lane);
@@ -33937,7 +33937,7 @@  vfmlalq_lane_high_u32 (float32x4_t __r, float16x8_t __a, float16x4_t __b,
 
 __extension__ extern __inline float32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vfmlslq_lane_high_u32 (float32x4_t __r, float16x8_t __a, float16x4_t __b,
+vfmlslq_lane_high_f16 (float32x4_t __r, float16x8_t __a, float16x4_t __b,
 		      const int __lane)
 {
   return __builtin_aarch64_fmlslq_lane_highv4sf (__r, __a, __b, __lane);
@@ -33945,7 +33945,7 @@  vfmlslq_lane_high_u32 (float32x4_t __r, float16x8_t __a, float16x4_t __b,
 
 __extension__ extern __inline float32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vfmlalq_laneq_high_u32 (float32x4_t __r, float16x8_t __a, float16x8_t __b,
+vfmlalq_laneq_high_f16 (float32x4_t __r, float16x8_t __a, float16x8_t __b,
 		       const int __lane)
 {
   return __builtin_aarch64_fmlalq_laneq_highv4sf (__r, __a, __b, __lane);
@@ -33953,7 +33953,7 @@  vfmlalq_laneq_high_u32 (float32x4_t __r, float16x8_t __a, float16x8_t __b,
 
 __extension__ extern __inline float32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vfmlslq_laneq_high_u32 (float32x4_t __r, float16x8_t __a, float16x8_t __b,
+vfmlslq_laneq_high_f16 (float32x4_t __r, float16x8_t __a, float16x8_t __b,
 		      const int __lane)
 {
   return __builtin_aarch64_fmlslq_laneq_highv4sf (__r, __a, __b, __lane);
diff --git a/gcc/testsuite/gcc.target/aarch64/fp16_fmul_high.h b/gcc/testsuite/gcc.target/aarch64/fp16_fmul_high.h
index 9c86bd19153cc0888f7b28f36d141b9fe08f535e..def85038a7208725ecb1db0888a1cc651aaa4934 100644
--- a/gcc/testsuite/gcc.target/aarch64/fp16_fmul_high.h
+++ b/gcc/testsuite/gcc.target/aarch64/fp16_fmul_high.h
@@ -1,25 +1,25 @@ 
 #include "arm_neon.h"
 
 float32x2_t
-test_vfmlal_high_u32 (float32x2_t r, float16x4_t a, float16x4_t b)
+test_vfmlal_high_f16 (float32x2_t r, float16x4_t a, float16x4_t b)
 {
-  return vfmlal_high_u32 (r, a, b);
+  return vfmlal_high_f16 (r, a, b);
 }
 
 float32x4_t
-test_vfmlalq_high_u32 (float32x4_t r, float16x8_t a, float16x8_t b)
+test_vfmlalq_high_f16 (float32x4_t r, float16x8_t a, float16x8_t b)
 {
-  return vfmlalq_high_u32 (r, a, b);
+  return vfmlalq_high_f16 (r, a, b);
 }
 
 float32x2_t
-test_vfmlsl_high_u32 (float32x2_t r, float16x4_t a, float16x4_t b)
+test_vfmlsl_high_f16 (float32x2_t r, float16x4_t a, float16x4_t b)
 {
-  return vfmlsl_high_u32 (r, a, b);
+  return vfmlsl_high_f16 (r, a, b);
 }
 
 float32x4_t
-test_vfmlslq_high_u32 (float32x4_t r, float16x8_t a, float16x8_t b)
+test_vfmlslq_high_f16 (float32x4_t r, float16x8_t a, float16x8_t b)
 {
-  return vfmlslq_high_u32 (r, a, b);
+  return vfmlslq_high_f16 (r, a, b);
 }
diff --git a/gcc/testsuite/gcc.target/aarch64/fp16_fmul_lane_high.h b/gcc/testsuite/gcc.target/aarch64/fp16_fmul_lane_high.h
index 1039347865e0bc79dfe351fd52f36964e7c41188..a0b95f8b81e4799a6075b0f0fca6834f73de0dc8 100644
--- a/gcc/testsuite/gcc.target/aarch64/fp16_fmul_lane_high.h
+++ b/gcc/testsuite/gcc.target/aarch64/fp16_fmul_lane_high.h
@@ -1,49 +1,49 @@ 
 #include "arm_neon.h"
 
 float32x2_t
-test_vfmlal_lane_high_u32 (float32x2_t r, float16x4_t a, float16x4_t b)
+test_vfmlal_lane_high_f16 (float32x2_t r, float16x4_t a, float16x4_t b)
 {
-  return vfmlal_lane_high_u32 (r, a, b, 0);
+  return vfmlal_lane_high_f16 (r, a, b, 0);
 }
 
 float32x2_t
-tets_vfmlsl_lane_high_u32  (float32x2_t r, float16x4_t a, float16x4_t b)
+tets_vfmlsl_lane_high_f16  (float32x2_t r, float16x4_t a, float16x4_t b)
 {
-  return vfmlsl_lane_high_u32 (r, a, b, 0);
+  return vfmlsl_lane_high_f16 (r, a, b, 0);
 }
 
 float32x2_t
-test_vfmlal_laneq_high_u32 (float32x2_t r, float16x4_t a, float16x8_t b)
+test_vfmlal_laneq_high_f16 (float32x2_t r, float16x4_t a, float16x8_t b)
 {
-  return vfmlal_laneq_high_u32 (r, a, b, 6);
+  return vfmlal_laneq_high_f16 (r, a, b, 6);
 }
 
 float32x2_t
-test_vfmlsl_laneq_high_u32 (float32x2_t r, float16x4_t a, float16x8_t b)
+test_vfmlsl_laneq_high_f16 (float32x2_t r, float16x4_t a, float16x8_t b)
 {
-  return vfmlsl_laneq_high_u32 (r, a, b, 6);
+  return vfmlsl_laneq_high_f16 (r, a, b, 6);
 }
 
 float32x4_t
-test_vfmlalq_lane_high_u32 (float32x4_t r, float16x8_t a, float16x4_t b)
+test_vfmlalq_lane_high_f16 (float32x4_t r, float16x8_t a, float16x4_t b)
 {
-  return vfmlalq_lane_high_u32 (r, a, b, 1);
+  return vfmlalq_lane_high_f16 (r, a, b, 1);
 }
 
 float32x4_t
-test_vfmlslq_lane_high_u32 (float32x4_t r, float16x8_t a, float16x4_t b)
+test_vfmlslq_lane_high_f16 (float32x4_t r, float16x8_t a, float16x4_t b)
 {
-  return vfmlslq_lane_high_u32 (r, a, b, 1);
+  return vfmlslq_lane_high_f16 (r, a, b, 1);
 }
 
 float32x4_t
-test_vfmlalq_laneq_high_u32  (float32x4_t r, float16x8_t a, float16x8_t b)
+test_vfmlalq_laneq_high_f16  (float32x4_t r, float16x8_t a, float16x8_t b)
 {
-  return vfmlalq_laneq_high_u32 (r, a, b, 7);
+  return vfmlalq_laneq_high_f16 (r, a, b, 7);
 }
 
 float32x4_t
-test_vfmlslq_laneq_high_u32 (float32x4_t r, float16x8_t a, float16x8_t b)
+test_vfmlslq_laneq_high_f16 (float32x4_t r, float16x8_t a, float16x8_t b)
 {
-  return vfmlslq_laneq_high_u32 (r, a, b, 7);
+  return vfmlslq_laneq_high_f16 (r, a, b, 7);
 }
diff --git a/gcc/testsuite/gcc.target/aarch64/fp16_fmul_lane_low.h b/gcc/testsuite/gcc.target/aarch64/fp16_fmul_lane_low.h
index b689741bdb006e89f14f29b803ba6d38a62b387e..bf49829c4bec941970eaf4e32cabf65719be9eaa 100644
--- a/gcc/testsuite/gcc.target/aarch64/fp16_fmul_lane_low.h
+++ b/gcc/testsuite/gcc.target/aarch64/fp16_fmul_lane_low.h
@@ -1,49 +1,49 @@ 
 #include "arm_neon.h"
 
 float32x2_t
-test_vfmlal_lane_low_u32 (float32x2_t r, float16x4_t a, float16x4_t b)
+test_vfmlal_lane_low_f16 (float32x2_t r, float16x4_t a, float16x4_t b)
 {
-  return vfmlal_lane_low_u32 (r, a, b, 0);
+  return vfmlal_lane_low_f16 (r, a, b, 0);
 }
 
 float32x2_t
-test_vfmlsl_lane_low_u32 (float32x2_t r, float16x4_t a, float16x4_t b)
+test_vfmlsl_lane_low_f16 (float32x2_t r, float16x4_t a, float16x4_t b)
 {
-  return vfmlsl_lane_low_u32 (r, a, b, 0);
+  return vfmlsl_lane_low_f16 (r, a, b, 0);
 }
 
 float32x2_t
-test_vfmlal_laneq_low_u32 (float32x2_t r, float16x4_t a, float16x8_t b)
+test_vfmlal_laneq_low_f16 (float32x2_t r, float16x4_t a, float16x8_t b)
 {
-  return vfmlal_laneq_low_u32 (r, a, b, 6);
+  return vfmlal_laneq_low_f16 (r, a, b, 6);
 }
 
 float32x2_t
-test_vfmlsl_laneq_low_u32 (float32x2_t r, float16x4_t a, float16x8_t b)
+test_vfmlsl_laneq_low_f16 (float32x2_t r, float16x4_t a, float16x8_t b)
 {
-  return vfmlsl_laneq_low_u32 (r, a, b, 6);
+  return vfmlsl_laneq_low_f16 (r, a, b, 6);
 }
 
 float32x4_t
-test_vfmlalq_lane_low_u32 (float32x4_t r, float16x8_t a, float16x4_t b)
+test_vfmlalq_lane_low_f16 (float32x4_t r, float16x8_t a, float16x4_t b)
 {
-  return vfmlalq_lane_low_u32 (r, a, b, 1);
+  return vfmlalq_lane_low_f16 (r, a, b, 1);
 }
 
 float32x4_t
-test_vfmlslq_lane_low_u32 (float32x4_t r, float16x8_t a, float16x4_t b)
+test_vfmlslq_lane_low_f16 (float32x4_t r, float16x8_t a, float16x4_t b)
 {
-  return vfmlslq_lane_low_u32 (r, a, b, 1);
+  return vfmlslq_lane_low_f16 (r, a, b, 1);
 }
 
 float32x4_t
-test_vfmlalq_laneq_low_u32 (float32x4_t r, float16x8_t a, float16x8_t b)
+test_vfmlalq_laneq_low_f16 (float32x4_t r, float16x8_t a, float16x8_t b)
 {
-  return vfmlalq_laneq_low_u32 (r, a, b, 7);
+  return vfmlalq_laneq_low_f16 (r, a, b, 7);
 }
 
 float32x4_t
-test_vfmlslq_laneq_low_u32 (float32x4_t r, float16x8_t a, float16x8_t b)
+test_vfmlslq_laneq_low_f16 (float32x4_t r, float16x8_t a, float16x8_t b)
 {
-  return vfmlslq_laneq_low_u32 (r, a, b, 7);
+  return vfmlslq_laneq_low_f16 (r, a, b, 7);
 }
diff --git a/gcc/testsuite/gcc.target/aarch64/fp16_fmul_low.h b/gcc/testsuite/gcc.target/aarch64/fp16_fmul_low.h
index 778ca1c245c7343b38272e586a54927c7cd50bee..b039b548b5809f92a6ef0f91f6ab475b2b03866c 100644
--- a/gcc/testsuite/gcc.target/aarch64/fp16_fmul_low.h
+++ b/gcc/testsuite/gcc.target/aarch64/fp16_fmul_low.h
@@ -1,25 +1,25 @@ 
 #include "arm_neon.h"
 
 float32x2_t
-test_vfmlal_low_u32 (float32x2_t r, float16x4_t a, float16x4_t b)
+test_vfmlal_low_f16 (float32x2_t r, float16x4_t a, float16x4_t b)
 {
-  return vfmlal_low_u32 (r, a, b);
+  return vfmlal_low_f16 (r, a, b);
 }
 
 float32x4_t
-test_vfmlalq_low_u32 (float32x4_t r, float16x8_t a, float16x8_t b)
+test_vfmlalq_low_f16 (float32x4_t r, float16x8_t a, float16x8_t b)
 {
-  return vfmlalq_low_u32 (r, a, b);
+  return vfmlalq_low_f16 (r, a, b);
 }
 
 float32x2_t
-test_vfmlsl_low_u32 (float32x2_t r, float16x4_t a, float16x4_t b)
+test_vfmlsl_low_f16 (float32x2_t r, float16x4_t a, float16x4_t b)
 {
-  return vfmlsl_low_u32 (r, a, b);
+  return vfmlsl_low_f16 (r, a, b);
 }
 
 float32x4_t
-test_vfmlslq_low_u32 (float32x4_t r, float16x8_t a, float16x8_t b)
+test_vfmlslq_low_f16 (float32x4_t r, float16x8_t a, float16x8_t b)
 {
-  return vfmlslq_low_u32 (r, a, b);
+  return vfmlslq_low_f16 (r, a, b);
 }
diff --git a/gcc/testsuite/lib/target-supports.exp b/gcc/testsuite/lib/target-supports.exp
index c0df467e0175cd92c688cedebb97fd4ae87e985e..21ac2ee3b4c9591ac9efad6a1567e35fc8e3291b 100644
--- a/gcc/testsuite/lib/target-supports.exp
+++ b/gcc/testsuite/lib/target-supports.exp
@@ -4522,7 +4522,7 @@  proc check_effective_target_arm_fp16fml_neon_ok_nocache { } {
 		float32x2_t
 		foo (float32x2_t r, float16x4_t a, float16x4_t b)
 		{
-		  return vfmlal_high_u32 (r, a, b);
+		  return vfmlal_high_f16 (r, a, b);
 		}
         } "$flags -march=armv8.2-a+fp16fml"] } {
             set et_arm_fp16fml_neon_flags "$flags -march=armv8.2-a+fp16fml"