diff mbox

[7/7,ARM] Add ACLE intrinsics vqrdmlah_lane and vqrdmlsh_lane

Message ID 56572EFC.2030407@foss.arm.com
State New
Headers show

Commit Message

Matthew Wahab Nov. 26, 2015, 4:10 p.m. UTC
Attached the missing patch.
Matthew

On 26/11/15 16:04, Matthew Wahab wrote:
> Hello,
>
> This patch adds the ACLE intrinsics for the instructions introduced in
> ARMv8.1. It adds the vqrmdlah_lane and vqrdmlsh_lane forms of the
> instrinsics to the arm_neon.h header, together with the ARM builtins
> used to implement them. The intrinsics are available when
> -march=armv8.1-a is enabled together with appropriate fpu options.
>
> Tested the series for arm-none-eabi with cross-compiled check-gcc on an
> ARMv8.1 emulator. Also tested arm-none-linux-gnueabihf with native
> bootstrap and make check.
>
> Ok for trunk?
> Matthew
>
> gcc/
> 2015-11-26  Matthew Wahab  <matthew.wahab@arm.com>
>
>      * config/arm/arm_neon.h (vqrdmlahq_lane_s16): New.
>      (vqrdmlahq_lane_s32): New.
>      (vqrdmlah_lane_s16): New.
>      (vqrdmlah_lane_s32): New.
>      (vqrdmlshq_lane_s16): New.
>      (vqrdmlshq_lane_s32): New.
>      (vqrdmlsh_lane_s16): New.
>      (vqrdmlsh_lane_s32): New.
>      * config/arm/arm_neon_builtins.def: Add "vqrdmlah_lane" and
>      "vqrdmlsh_lane".
>
diff mbox

Patch

From cdfee6be49e52056de8999fbc33a432f2cc7254f Mon Sep 17 00:00:00 2001
From: Matthew Wahab <matthew.wahab@arm.com>
Date: Tue, 1 Sep 2015 16:22:34 +0100
Subject: [PATCH 7/7] [ARM] Add neon intrinsics vqrdmlah_lane, vqrdmlsh_lane.

Change-Id: Ia0ab4bbe683af2d019d18a34302a7b9798193a79
---
 gcc/config/arm/arm_neon.h            | 50 ++++++++++++++++++++++++++++++++++++
 gcc/config/arm/arm_neon_builtins.def |  2 ++
 2 files changed, 52 insertions(+)

diff --git a/gcc/config/arm/arm_neon.h b/gcc/config/arm/arm_neon.h
index b617f80..ed50253 100644
--- a/gcc/config/arm/arm_neon.h
+++ b/gcc/config/arm/arm_neon.h
@@ -7096,6 +7096,56 @@  vqrdmulh_lane_s32 (int32x2_t __a, int32x2_t __b, const int __c)
   return (int32x2_t)__builtin_neon_vqrdmulh_lanev2si (__a, __b, __c);
 }
 
+#ifdef __ARM_FEATURE_QRDMX
+__extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
+vqrdmlahq_lane_s16 (int16x8_t __a, int16x8_t __b, int16x4_t __c, const int __d)
+{
+  return (int16x8_t)__builtin_neon_vqrdmlah_lanev8hi (__a, __b, __c, __d);
+}
+
+__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
+vqrdmlahq_lane_s32 (int32x4_t __a, int32x4_t __b, int32x2_t __c, const int __d)
+{
+  return (int32x4_t)__builtin_neon_vqrdmlah_lanev4si (__a, __b, __c, __d);
+}
+
+__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
+vqrdmlah_lane_s16 (int16x4_t __a, int16x4_t __b, int16x4_t __c, const int __d)
+{
+  return (int16x4_t)__builtin_neon_vqrdmlah_lanev4hi (__a, __b, __c, __d);
+}
+
+__extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
+vqrdmlah_lane_s32 (int32x2_t __a, int32x2_t __b, int32x2_t __c, const int __d)
+{
+  return (int32x2_t)__builtin_neon_vqrdmlah_lanev2si (__a, __b, __c, __d);
+}
+
+__extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
+vqrdmlshq_lane_s16 (int16x8_t __a, int16x8_t __b, int16x4_t __c, const int __d)
+{
+  return (int16x8_t)__builtin_neon_vqrdmlsh_lanev8hi (__a, __b, __c, __d);
+}
+
+__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
+vqrdmlshq_lane_s32 (int32x4_t __a, int32x4_t __b, int32x2_t __c, const int __d)
+{
+  return (int32x4_t)__builtin_neon_vqrdmlsh_lanev4si (__a, __b, __c, __d);
+}
+
+__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
+vqrdmlsh_lane_s16 (int16x4_t __a, int16x4_t __b, int16x4_t __c, const int __d)
+{
+  return (int16x4_t)__builtin_neon_vqrdmlsh_lanev4hi (__a, __b, __c, __d);
+}
+
+__extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
+vqrdmlsh_lane_s32 (int32x2_t __a, int32x2_t __b, int32x2_t __c, const int __d)
+{
+  return (int32x2_t)__builtin_neon_vqrdmlsh_lanev2si (__a, __b, __c, __d);
+}
+#endif
+
 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
 vmul_n_s16 (int16x4_t __a, int16_t __b)
 {
diff --git a/gcc/config/arm/arm_neon_builtins.def b/gcc/config/arm/arm_neon_builtins.def
index 8d5c0ca..1fdb2a8 100644
--- a/gcc/config/arm/arm_neon_builtins.def
+++ b/gcc/config/arm/arm_neon_builtins.def
@@ -60,6 +60,8 @@  VAR4 (BINOP, vqdmulh_n, v4hi, v2si, v8hi, v4si)
 VAR4 (BINOP, vqrdmulh_n, v4hi, v2si, v8hi, v4si)
 VAR4 (SETLANE, vqdmulh_lane, v4hi, v2si, v8hi, v4si)
 VAR4 (SETLANE, vqrdmulh_lane, v4hi, v2si, v8hi, v4si)
+VAR4 (MAC_LANE, vqrdmlah_lane, v4hi, v2si, v8hi, v4si)
+VAR4 (MAC_LANE, vqrdmlsh_lane, v4hi, v2si, v8hi, v4si)
 VAR2 (BINOP, vqdmull, v4hi, v2si)
 VAR8 (BINOP, vshls, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di)
 VAR8 (BINOP, vshlu, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di)
-- 
2.1.4