diff mbox

[AArch64,10/14] ARMv8.2-A FP16 lane scalar intrinsics

Message ID 1328362a-8fc3-b217-b68d-d5e4549a6dbe@foss.arm.com
State New
Headers show

Commit Message

Jiong Wang July 7, 2016, 4:18 p.m. UTC
This patch adds ARMv8.2-A FP16 lane scalar intrinsics.

gcc/
2016-07-07  Jiong Wang <jiong.wang@arm.com>

         * config/aarch64/arm_neon.h (vfmah_lane_f16): New.
         (vfmah_laneq_f16): Likewise.
         (vfmsh_lane_f16): Likewise.
         (vfmsh_laneq_f16): Likewise.
         (vmulh_lane_f16): Likewise.
         (vmulh_laneq_f16): Likewise.
         (vmulxh_lane_f16): Likewise.
         (vmulxh_laneq_f16): Likewise.

Comments

James Greenhalgh July 25, 2016, 11:15 a.m. UTC | #1
On Thu, Jul 07, 2016 at 05:18:29PM +0100, Jiong Wang wrote:
> This patch adds ARMv8.2-A FP16 lane scalar intrinsics.

OK.

Thanks,
James

> 
> gcc/
> 2016-07-07  Jiong Wang <jiong.wang@arm.com>
> 
>         * config/aarch64/arm_neon.h (vfmah_lane_f16): New.
>         (vfmah_laneq_f16): Likewise.
>         (vfmsh_lane_f16): Likewise.
>         (vfmsh_laneq_f16): Likewise.
>         (vmulh_lane_f16): Likewise.
>         (vmulh_laneq_f16): Likewise.
>         (vmulxh_lane_f16): Likewise.
>         (vmulxh_laneq_f16): Likewise.
>
diff mbox

Patch

From bcbe5035746c5684a3b9f0b62310f6aa276db364 Mon Sep 17 00:00:00 2001
From: Jiong Wang <jiong.wang@arm.com>
Date: Thu, 9 Jun 2016 11:06:29 +0100
Subject: [PATCH 10/14] [10/14] ARMv8.2 FP16 lane scalar intrinsics

---
 gcc/config/aarch64/arm_neon.h | 52 +++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 52 insertions(+)

diff --git a/gcc/config/aarch64/arm_neon.h b/gcc/config/aarch64/arm_neon.h
index e727ff1..09095d1 100644
--- a/gcc/config/aarch64/arm_neon.h
+++ b/gcc/config/aarch64/arm_neon.h
@@ -26488,6 +26488,20 @@  vfmsq_f16 (float16x8_t __a, float16x8_t __b, float16x8_t __c)
 
 /* ARMv8.2-A FP16 lane vector intrinsics.  */
 
+__extension__ static __inline float16_t __attribute__ ((__always_inline__))
+vfmah_lane_f16 (float16_t __a, float16_t __b,
+		float16x4_t __c, const int __lane)
+{
+  return vfmah_f16 (__a, __b, __aarch64_vget_lane_any (__c, __lane));
+}
+
+__extension__ static __inline float16_t __attribute__ ((__always_inline__))
+vfmah_laneq_f16 (float16_t __a, float16_t __b,
+		 float16x8_t __c, const int __lane)
+{
+  return vfmah_f16 (__a, __b, __aarch64_vget_lane_any (__c, __lane));
+}
+
 __extension__ static __inline float16x4_t __attribute__ ((__always_inline__))
 vfma_lane_f16 (float16x4_t __a, float16x4_t __b,
 	       float16x4_t __c, const int __lane)
@@ -26528,6 +26542,20 @@  vfmaq_n_f16 (float16x8_t __a, float16x8_t __b, float16_t __c)
   return vfmaq_f16 (__a, __b, vdupq_n_f16 (__c));
 }
 
+__extension__ static __inline float16_t __attribute__ ((__always_inline__))
+vfmsh_lane_f16 (float16_t __a, float16_t __b,
+		float16x4_t __c, const int __lane)
+{
+  return vfmsh_f16 (__a, __b, __aarch64_vget_lane_any (__c, __lane));
+}
+
+__extension__ static __inline float16_t __attribute__ ((__always_inline__))
+vfmsh_laneq_f16 (float16_t __a, float16_t __b,
+		 float16x8_t __c, const int __lane)
+{
+  return vfmsh_f16 (__a, __b, __aarch64_vget_lane_any (__c, __lane));
+}
+
 __extension__ static __inline float16x4_t __attribute__ ((__always_inline__))
 vfms_lane_f16 (float16x4_t __a, float16x4_t __b,
 	       float16x4_t __c, const int __lane)
@@ -26568,6 +26596,12 @@  vfmsq_n_f16 (float16x8_t __a, float16x8_t __b, float16_t __c)
   return vfmsq_f16 (__a, __b, vdupq_n_f16 (__c));
 }
 
+__extension__ static __inline float16_t __attribute__ ((__always_inline__))
+vmulh_lane_f16 (float16_t __a, float16x4_t __b, const int __lane)
+{
+  return __a * __aarch64_vget_lane_any (__b, __lane);
+}
+
 __extension__ static __inline float16x4_t __attribute__ ((__always_inline__))
 vmul_lane_f16 (float16x4_t __a, float16x4_t __b, const int __lane)
 {
@@ -26580,6 +26614,12 @@  vmulq_lane_f16 (float16x8_t __a, float16x4_t __b, const int __lane)
   return vmulq_f16 (__a, vdupq_n_f16 (__aarch64_vget_lane_any (__b, __lane)));
 }
 
+__extension__ static __inline float16_t __attribute__ ((__always_inline__))
+vmulh_laneq_f16 (float16_t __a, float16x8_t __b, const int __lane)
+{
+  return __a * __aarch64_vget_lane_any (__b, __lane);
+}
+
 __extension__ static __inline float16x4_t __attribute__ ((__always_inline__))
 vmul_laneq_f16 (float16x4_t __a, float16x8_t __b, const int __lane)
 {
@@ -26604,6 +26644,12 @@  vmulq_n_f16 (float16x8_t __a, float16_t __b)
   return vmulq_laneq_f16 (__a, vdupq_n_f16 (__b), 0);
 }
 
+__extension__ static __inline float16_t __attribute__ ((__always_inline__))
+vmulxh_lane_f16 (float16_t __a, float16x4_t __b, const int __lane)
+{
+  return vmulxh_f16 (__a, __aarch64_vget_lane_any (__b, __lane));
+}
+
 __extension__ static __inline float16x4_t __attribute__ ((__always_inline__))
 vmulx_lane_f16 (float16x4_t __a, float16x4_t __b, const int __lane)
 {
@@ -26616,6 +26662,12 @@  vmulxq_lane_f16 (float16x8_t __a, float16x4_t __b, const int __lane)
   return vmulxq_f16 (__a, __aarch64_vdupq_lane_f16 (__b, __lane));
 }
 
+__extension__ static __inline float16_t __attribute__ ((__always_inline__))
+vmulxh_laneq_f16 (float16_t __a, float16x8_t __b, const int __lane)
+{
+  return vmulxh_f16 (__a, __aarch64_vget_lane_any (__b, __lane));
+}
+
 __extension__ static __inline float16x4_t __attribute__ ((__always_inline__))
 vmulx_laneq_f16 (float16x4_t __a, float16x8_t __b, const int __lane)
 {
-- 
2.5.0