Patchwork [AArch64] Implement framework for Tree/Gimple Implementation of NEON intrinsics.

login
register
mail settings
Submitter Tejas Belagod
Date March 14, 2013, 12:49 p.m.
Message ID <5141C759.8090707@arm.com>
Download mbox | patch
Permalink /patch/227650/
State New
Headers show

Comments

Tejas Belagod - March 14, 2013, 12:49 p.m.
Hi,

Attached is a patch that implements the framework necessary for implementing
NEON Intrinsics' builtins in Tree/Gimple rather than RTL. For this it uses the
target hook TARGET_FOLD_BUILTIN and folds all the builtins for NEON Intrinsics
into equivalent trees. This framework is accompanied by an example
implementation of vaddv<q>_f<32, 64> intrinsics using the framework.

Regression tested on aarch64-none-elf. OK for trunk?

Thanks,
Tejas Belagod
ARM.

Changelog:

2013-03-14  Tejas Belagod  <tejas.belagod@arm.com>

gcc/
	* config/aarch64/aarch64-builtins.c (aarch64_fold_builtin): New.
	* config/aarch64/aarch64-protos.h (aarch64_fold_builtin): Declare.
	* config/aarch64/aarch64-simd-builtins.def: New entry for reduc_splus.
	* config/aarch64/aarch64.c (TARGET_FOLD_BUILTIN): Define.
	* config/aarch64/arm_neon.h (vaddv_f32, vaddvq_f32, vaddvq_f64): New.

testsuite/
	* gcc.target/aarch64/vaddv-intrinsic-compile.c: New.
	* gcc.target/aarch64/vaddv-intrinsic.c: New.
Marcus Shawcroft - March 15, 2013, 2:39 p.m.
On 14/03/13 12:49, Tejas Belagod wrote:
> Hi,
>
> Attached is a patch that implements the framework necessary for implementing
> NEON Intrinsics' builtins in Tree/Gimple rather than RTL. For this it uses the
> target hook TARGET_FOLD_BUILTIN and folds all the builtins for NEON Intrinsics
> into equivalent trees. This framework is accompanied by an example
> implementation of vaddv<q>_f<32, 64> intrinsics using the framework.
>
> Regression tested on aarch64-none-elf. OK for trunk?
>
> Thanks,
> Tejas Belagod
> ARM.
>
> Changelog:
>
> 2013-03-14  Tejas Belagod  <tejas.belagod@arm.com>
>
> gcc/
> 	* config/aarch64/aarch64-builtins.c (aarch64_fold_builtin): New.
> 	* config/aarch64/aarch64-protos.h (aarch64_fold_builtin): Declare.
> 	* config/aarch64/aarch64-simd-builtins.def: New entry for reduc_splus.
> 	* config/aarch64/aarch64.c (TARGET_FOLD_BUILTIN): Define.
> 	* config/aarch64/arm_neon.h (vaddv_f32, vaddvq_f32, vaddvq_f64): New.
>
> testsuite/
> 	* gcc.target/aarch64/vaddv-intrinsic-compile.c: New.
> 	* gcc.target/aarch64/vaddv-intrinsic.c: New.
>

I think we need to wait for a resolution to this thread:
http://gcc.gnu.org/ml/gcc-patches/2013-03/msg00505.html
before we can take this patch.

/Marcus

Patch

diff --git a/gcc/config/aarch64/aarch64-builtins.c b/gcc/config/aarch64/aarch64-builtins.c
index 35475ba..a1bd032 100644
--- a/gcc/config/aarch64/aarch64-builtins.c
+++ b/gcc/config/aarch64/aarch64-builtins.c
@@ -1254,6 +1254,31 @@  aarch64_builtin_vectorized_function (tree fndecl, tree type_out, tree type_in)
 
   return NULL_TREE;
 }
+
+#undef VAR1
+#define VAR1(T, N, MAP, A) \
+  case AARCH64_SIMD_BUILTIN_##N##A:
+
+tree
+aarch64_fold_builtin (tree fndecl, int n_args ATTRIBUTE_UNUSED, tree *args,
+		      bool ignore ATTRIBUTE_UNUSED)
+{
+  int fcode = DECL_FUNCTION_CODE (fndecl);
+  tree type = TREE_TYPE (TREE_TYPE (fndecl));
+
+  switch (fcode)
+    {
+      BUILTIN_VDQF (UNOP, reduc_splus_, 10)
+	return fold_build1 (REDUC_PLUS_EXPR, type, args[0]);
+	break;
+
+      default:
+	break;
+    }
+
+  return NULL_TREE;
+}
+
 #undef AARCH64_CHECK_BUILTIN_MODE
 #undef AARCH64_FIND_FRINT_VARIANT
 #undef BUILTIN_DX
diff --git a/gcc/config/aarch64/aarch64-protos.h b/gcc/config/aarch64/aarch64-protos.h
index 5d0072f..1bb33e8 100644
--- a/gcc/config/aarch64/aarch64-protos.h
+++ b/gcc/config/aarch64/aarch64-protos.h
@@ -177,6 +177,7 @@  rtx aarch64_simd_gen_const_vector_dup (enum machine_mode, int);
 bool aarch64_simd_mem_operand_p (rtx);
 rtx aarch64_simd_vect_par_cnst_half (enum machine_mode, bool);
 rtx aarch64_tls_get_addr (void);
+tree aarch64_fold_builtin (tree, int, tree *, bool);
 unsigned aarch64_dbx_register_number (unsigned);
 unsigned aarch64_trampoline_size (void);
 void aarch64_asm_output_labelref (FILE *, const char *);
diff --git a/gcc/config/aarch64/aarch64-simd-builtins.def b/gcc/config/aarch64/aarch64-simd-builtins.def
index e18e3f3..1dd4ad6 100644
--- a/gcc/config/aarch64/aarch64-simd-builtins.def
+++ b/gcc/config/aarch64/aarch64-simd-builtins.def
@@ -238,6 +238,9 @@ 
   BUILTIN_VDQF (BINOP, fmax, 0)
   BUILTIN_VDQF (BINOP, fmin, 0)
 
+  /* Implemented by reduc_splus_<mode>.  */
+  BUILTIN_VDQF (UNOP, reduc_splus_, 10)
+
   /* Implemented by <maxmin><mode>3.  */
   BUILTIN_VDQ_BHSI (BINOP, smax, 3)
   BUILTIN_VDQ_BHSI (BINOP, smin, 3)
diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
index 45c4106..156c20e 100644
--- a/gcc/config/aarch64/aarch64.c
+++ b/gcc/config/aarch64/aarch64.c
@@ -7829,6 +7829,9 @@  aarch64_vectorize_vec_perm_const_ok (enum machine_mode vmode,
 #undef TARGET_EXPAND_BUILTIN_VA_START
 #define TARGET_EXPAND_BUILTIN_VA_START aarch64_expand_builtin_va_start
 
+#undef TARGET_FOLD_BUILTIN
+#define TARGET_FOLD_BUILTIN aarch64_fold_builtin
+
 #undef TARGET_FUNCTION_ARG
 #define TARGET_FUNCTION_ARG aarch64_function_arg
 
diff --git a/gcc/config/aarch64/arm_neon.h b/gcc/config/aarch64/arm_neon.h
index 5e25c77..6198f99 100644
--- a/gcc/config/aarch64/arm_neon.h
+++ b/gcc/config/aarch64/arm_neon.h
@@ -19731,6 +19731,29 @@  vaddd_u64 (uint64x1_t __a, uint64x1_t __b)
   return __a + __b;
 }
 
+/* vaddv */
+
+__extension__ static __inline float32_t __attribute__ ((__always_inline__))
+vaddv_f32 (float32x2_t __a)
+{
+  float32x2_t t = __builtin_aarch64_reduc_splus_v2sf (__a);
+  return vget_lane_f32 (t, 0);
+}
+
+__extension__ static __inline float32_t __attribute__ ((__always_inline__))
+vaddvq_f32 (float32x4_t __a)
+{
+  float32x4_t t = __builtin_aarch64_reduc_splus_v4sf (__a);
+  return vgetq_lane_f32 (t, 0);
+}
+
+__extension__ static __inline float64_t __attribute__ ((__always_inline__))
+vaddvq_f64 (float64x2_t __a)
+{
+  float64x2_t t = __builtin_aarch64_reduc_splus_v2df (__a);
+  return vgetq_lane_f64 (t, 0);
+}
+
 /* vceq */
 
 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
diff --git a/gcc/testsuite/gcc.target/aarch64/vaddv-intrinsic-compile.c b/gcc/testsuite/gcc.target/aarch64/vaddv-intrinsic-compile.c
new file mode 100644
index 0000000..c736c0d
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/vaddv-intrinsic-compile.c
@@ -0,0 +1,36 @@ 
+
+/* { dg-do compile } */
+/* { dg-options "-O3" } */
+
+#include "arm_neon.h"
+
+float32_t
+test_vaddv_v2sf (const float32_t *pool)
+{
+  float32x2_t val;
+
+  val = vld1_f32 (pool);
+  return vaddv_f32 (val);
+}
+
+float32_t
+test_vaddv_v4sf (const float32_t *pool)
+{
+  float32x4_t val;
+
+  val = vld1q_f32 (pool);
+  return vaddvq_f32 (val);
+}
+
+float64_t
+test_vaddv_v2df (const float64_t *pool)
+{
+  float64x2_t val;
+
+  val = vld1q_f64 (pool);
+  return vaddvq_f64 (val);
+}
+
+/* { dg-final { scan-assembler "faddp\\ts\[0-9\]+"} } */
+/* { dg-final { scan-assembler-times "faddp\\tv\[0-9\]+\.4s" 2} } */
+/* { dg-final { scan-assembler "faddp\\td\[0-9\]+"} } */
diff --git a/gcc/testsuite/gcc.target/aarch64/vaddv-intrinsic.c b/gcc/testsuite/gcc.target/aarch64/vaddv-intrinsic.c
new file mode 100644
index 0000000..d324333
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/vaddv-intrinsic.c
@@ -0,0 +1,53 @@ 
+
+/* { dg-do run } */
+/* { dg-options "-O3" } */
+
+#include "arm_neon.h"
+
+extern void abort (void);
+
+float32_t
+test_vaddv_v2sf (const float32_t *pool)
+{
+  float32x2_t val;
+
+  val = vld1_f32 (pool);
+  return vaddv_f32 (val);
+}
+
+float32_t
+test_vaddv_v4sf (const float32_t *pool)
+{
+  float32x4_t val;
+
+  val = vld1q_f32 (pool);
+  return vaddvq_f32 (val);
+}
+
+float64_t
+test_vaddv_v2df (const float64_t *pool)
+{
+  float64x2_t val;
+
+  val = vld1q_f64 (pool);
+  return vaddvq_f64 (val);
+}
+
+int
+main (void)
+{
+  const float32_t pool_v2sf[] = {4.0f, 9.0f};
+  const float32_t pool_v4sf[] = {4.0f, 9.0f, 16.0f, 25.0f};
+  const float64_t pool_v2df[] = {4.0, 9.0};
+
+  if (test_vaddv_v2sf (pool_v2sf) != 13.0f)
+    abort ();
+
+  if (test_vaddv_v4sf (pool_v4sf) != 54.0f)
+    abort ();
+
+  if (test_vaddv_v2df (pool_v2df) != 13.0)
+    abort ();
+
+  return 0;
+}