Patchwork [AArch64] Implement TARGET_GIMPLE_FOLD_BUILTIN for aarch64 backend.

login
register
mail settings
Submitter James Greenhalgh
Date April 25, 2013, 10:29 a.m.
Message ID <1366885760-5375-1-git-send-email-james.greenhalgh@arm.com>
Download mbox | patch
Permalink /patch/239470/
State New
Headers show

Comments

James Greenhalgh - April 25, 2013, 10:29 a.m.
Hi,

This patch supersedes the patch by Tejas here:
http://gcc.gnu.org/ml/gcc-patches/2013-03/msg00502.html

The patch uses the new TARGET_GIMPLE_FOLD_BUILTIN mechanism rather
than TARGET_FOLD_BUILTIN.

This patch therefore adds the infrastructure for
TARGET_GIMPLE_FOLD_BUILTIN and an implementation folding addv
to REDUC_PLUS_EXPR.

Regression tested on aarch64-none-elf with no regressions.

Thanks,
James Greenhalgh

---
gcc/

2013-04-25  James Greenhalgh  <james.greenhalgh@arm.com>
	    Tejas Belagod  <tejas.belagod@arm.com>

	* config/aarch64/aarch64-builtins.c
	(aarch64_gimple_fold_builtin): New.
	* config/aarch64/aarch64-protos.h (aarch64_gimple_fold_builtin): New.
	* config/aarch64/aarch64-simd-builtins.def (addv): New.
	* config/aarch64/aarch64-simd.md (addpv4sf): New.
	(addvv4sf): Update.
	* config/aarch64/aarch64.c (TARGET_GIMPLE_FOLD_BUILTIN): Define.

gcc/testsuite/

2013-04-25  James Greenhalgh  <james.greenhalgh@arm.com>
	    Tejas Belagod  <tejas.belagod@arm.com>

	* gcc.target/aarch64/vaddv-intrinsic.c: New.
	* gcc.target/aarch64/vaddv-intrinsic-compile.c: Likewise.
	* gcc.target/aarch64/vaddv-intrinsic.x: Likewise.
Marcus Shawcroft - April 25, 2013, 11:57 a.m.
OK
/Marcus

On 25 April 2013 11:29, James Greenhalgh <james.greenhalgh@arm.com> wrote:
>
> Hi,
>
> This patch supersedes the patch by Tejas here:
> http://gcc.gnu.org/ml/gcc-patches/2013-03/msg00502.html
>
> The patch uses the new TARGET_GIMPLE_FOLD_BUILTIN mechanism rather
> than TARGET_FOLD_BUILTIN.
>
> This patch therefore adds the infrastructure for
> TARGET_GIMPLE_FOLD_BUILTIN and an implementation folding addv
> to REDUC_PLUS_EXPR.
>
> Regression tested on aarch64-none-elf with no regressions.
>
> Thanks,
> James Greenhalgh
>
> ---
> gcc/
>
> 2013-04-25  James Greenhalgh  <james.greenhalgh@arm.com>
>             Tejas Belagod  <tejas.belagod@arm.com>
>
>         * config/aarch64/aarch64-builtins.c
>         (aarch64_gimple_fold_builtin): New.
>         * config/aarch64/aarch64-protos.h (aarch64_gimple_fold_builtin): New.
>         * config/aarch64/aarch64-simd-builtins.def (addv): New.
>         * config/aarch64/aarch64-simd.md (addpv4sf): New.
>         (addvv4sf): Update.
>         * config/aarch64/aarch64.c (TARGET_GIMPLE_FOLD_BUILTIN): Define.
>
> gcc/testsuite/
>
> 2013-04-25  James Greenhalgh  <james.greenhalgh@arm.com>
>             Tejas Belagod  <tejas.belagod@arm.com>
>
>         * gcc.target/aarch64/vaddv-intrinsic.c: New.
>         * gcc.target/aarch64/vaddv-intrinsic-compile.c: Likewise.
>         * gcc.target/aarch64/vaddv-intrinsic.x: Likewise.

Patch

diff --git a/gcc/config/aarch64/aarch64-builtins.c b/gcc/config/aarch64/aarch64-builtins.c
index 35475ba..a786945 100644
--- a/gcc/config/aarch64/aarch64-builtins.c
+++ b/gcc/config/aarch64/aarch64-builtins.c
@@ -30,6 +30,7 @@ 
 #include "langhooks.h"
 #include "diagnostic-core.h"
 #include "optabs.h"
+#include "gimple.h"
 
 enum aarch64_simd_builtin_type_mode
 {
@@ -1254,6 +1255,54 @@  aarch64_builtin_vectorized_function (tree fndecl, tree type_out, tree type_in)
 
   return NULL_TREE;
 }
+
+#undef VAR1
+#define VAR1(T, N, MAP, A) \
+  case AARCH64_SIMD_BUILTIN_##N##A:
+
+bool
+aarch64_gimple_fold_builtin (gimple_stmt_iterator *gsi)
+{
+  bool changed = false;
+  gimple stmt = gsi_stmt (*gsi);
+  tree call = gimple_call_fn (stmt);
+  tree fndecl;
+  gimple new_stmt = NULL;
+  if (call)
+    {
+      fndecl = gimple_call_fndecl (stmt);
+      if (fndecl)
+	{
+	  int fcode = DECL_FUNCTION_CODE (fndecl);
+	  int nargs = gimple_call_num_args (stmt);
+	  tree *args = (nargs > 0
+			? gimple_call_arg_ptr (stmt, 0)
+			: &error_mark_node);
+
+	  switch (fcode)
+	    {
+	      BUILTIN_VDQF (UNOP, addv, 0)
+		new_stmt = gimple_build_assign_with_ops (
+						REDUC_PLUS_EXPR,
+						gimple_call_lhs (stmt),
+						args[0],
+						NULL_TREE);
+		break;
+	    default:
+	      break;
+	    }
+	}
+    }
+
+  if (new_stmt)
+    {
+      gsi_replace (gsi, new_stmt, true);
+      changed = true;
+    }
+
+  return changed;
+}
+
 #undef AARCH64_CHECK_BUILTIN_MODE
 #undef AARCH64_FIND_FRINT_VARIANT
 #undef BUILTIN_DX
diff --git a/gcc/config/aarch64/aarch64-protos.h b/gcc/config/aarch64/aarch64-protos.h
index 7ebbf51..bf02b99 100644
--- a/gcc/config/aarch64/aarch64-protos.h
+++ b/gcc/config/aarch64/aarch64-protos.h
@@ -140,6 +140,7 @@  bool aarch64_constant_address_p (rtx);
 bool aarch64_float_const_zero_rtx_p (rtx);
 bool aarch64_function_arg_regno_p (unsigned);
 bool aarch64_gen_movmemqi (rtx *);
+bool aarch64_gimple_fold_builtin (gimple_stmt_iterator *);
 bool aarch64_is_extend_from_extract (enum machine_mode, rtx, rtx);
 bool aarch64_is_long_call_p (rtx);
 bool aarch64_label_mentioned_p (rtx);
diff --git a/gcc/config/aarch64/aarch64-simd-builtins.def b/gcc/config/aarch64/aarch64-simd-builtins.def
index e18e3f3..534e23b 100644
--- a/gcc/config/aarch64/aarch64-simd-builtins.def
+++ b/gcc/config/aarch64/aarch64-simd-builtins.def
@@ -238,6 +238,9 @@ 
   BUILTIN_VDQF (BINOP, fmax, 0)
   BUILTIN_VDQF (BINOP, fmin, 0)
 
+  /* Implemented by aarch64_addv<mode>.  */
+  BUILTIN_VDQF (UNOP, addv, 0)
+
   /* Implemented by <maxmin><mode>3.  */
   BUILTIN_VDQ_BHSI (BINOP, smax, 3)
   BUILTIN_VDQ_BHSI (BINOP, smin, 3)
diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md
index 9b42365..e5506fc 100644
--- a/gcc/config/aarch64/aarch64-simd.md
+++ b/gcc/config/aarch64/aarch64-simd.md
@@ -1341,7 +1341,7 @@ 
 
 ;; FP 'across lanes' add.
 
-(define_insn "aarch64_addvv4sf"
+(define_insn "aarch64_addpv4sf"
  [(set (match_operand:V4SF 0 "register_operand" "=w")
        (unspec:V4SF [(match_operand:V4SF 1 "register_operand" "w")]
 		    UNSPEC_FADDV))]
@@ -1357,8 +1357,8 @@ 
  "TARGET_SIMD"
 {
   rtx tmp = gen_reg_rtx (V4SFmode);
-  emit_insn (gen_aarch64_addvv4sf (tmp, operands[1]));
-  emit_insn (gen_aarch64_addvv4sf (operands[0], tmp));
+  emit_insn (gen_aarch64_addpv4sf (tmp, operands[1]));
+  emit_insn (gen_aarch64_addpv4sf (operands[0], tmp));
   DONE;
 })
 
@@ -1368,8 +1368,18 @@ 
  "TARGET_SIMD"
 {
   rtx tmp = gen_reg_rtx (V4SFmode);
-  emit_insn (gen_aarch64_addvv4sf (tmp, operands[1]));
-  emit_insn (gen_aarch64_addvv4sf (operands[0], tmp));
+  emit_insn (gen_aarch64_addpv4sf (tmp, operands[1]));
+  emit_insn (gen_aarch64_addpv4sf (operands[0], tmp));
+  DONE;
+})
+
+(define_expand "aarch64_addvv4sf"
+ [(set (match_operand:V4SF 0 "register_operand" "=w")
+	(unspec:V4SF [(match_operand:V4SF 1 "register_operand" "w")]
+		     UNSPEC_FADDV))]
+ "TARGET_SIMD"
+{
+  emit_insn (gen_reduc_splus_v4sf (operands[0], operands[1]));
   DONE;
 })
 
diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
index 68f847a..22903e0 100644
--- a/gcc/config/aarch64/aarch64.c
+++ b/gcc/config/aarch64/aarch64.c
@@ -7895,6 +7895,9 @@  aarch64_vectorize_vec_perm_const_ok (enum machine_mode vmode,
 #undef TARGET_FRAME_POINTER_REQUIRED
 #define TARGET_FRAME_POINTER_REQUIRED aarch64_frame_pointer_required
 
+#undef TARGET_GIMPLE_FOLD_BUILTIN
+#define TARGET_GIMPLE_FOLD_BUILTIN aarch64_gimple_fold_builtin
+
 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
 #define TARGET_GIMPLIFY_VA_ARG_EXPR aarch64_gimplify_va_arg_expr
 
diff --git a/gcc/config/aarch64/arm_neon.h b/gcc/config/aarch64/arm_neon.h
index 58343a7..e7ba000 100644
--- a/gcc/config/aarch64/arm_neon.h
+++ b/gcc/config/aarch64/arm_neon.h
@@ -19731,6 +19731,27 @@  vaddd_u64 (uint64x1_t __a, uint64x1_t __b)
   return __a + __b;
 }
 
+__extension__ static __inline float32_t __attribute__ ((__always_inline__))
+vaddv_f32 (float32x2_t __a)
+{
+  float32x2_t t = __builtin_aarch64_addvv2sf (__a);
+  return vget_lane_f32 (t, 0);
+}
+
+__extension__ static __inline float32_t __attribute__ ((__always_inline__))
+vaddvq_f32 (float32x4_t __a)
+{
+  float32x4_t t = __builtin_aarch64_addvv4sf (__a);
+  return vgetq_lane_f32 (t, 0);
+}
+
+__extension__ static __inline float64_t __attribute__ ((__always_inline__))
+vaddvq_f64 (float64x2_t __a)
+{
+  float64x2_t t = __builtin_aarch64_addvv2df (__a);
+  return vgetq_lane_f64 (t, 0);
+}
+
 /* vceq */
 
 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
diff --git a/gcc/testsuite/gcc.target/aarch64/vaddv-intrinsic-compile.c b/gcc/testsuite/gcc.target/aarch64/vaddv-intrinsic-compile.c
new file mode 100644
index 0000000..11fa984
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/vaddv-intrinsic-compile.c
@@ -0,0 +1,11 @@ 
+
+/* { dg-do compile } */
+/* { dg-options "-O3" } */
+
+#include "arm_neon.h"
+
+#include "vaddv-intrinsic.x"
+
+/* { dg-final { scan-assembler "faddp\\ts\[0-9\]+"} } */
+/* { dg-final { scan-assembler-times "faddp\\tv\[0-9\]+\.4s" 2} } */
+/* { dg-final { scan-assembler "faddp\\td\[0-9\]+"} } */
diff --git a/gcc/testsuite/gcc.target/aarch64/vaddv-intrinsic.c b/gcc/testsuite/gcc.target/aarch64/vaddv-intrinsic.c
new file mode 100644
index 0000000..f6e0829
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/vaddv-intrinsic.c
@@ -0,0 +1,28 @@ 
+
+/* { dg-do run } */
+/* { dg-options "-O3" } */
+
+#include "arm_neon.h"
+
+extern void abort (void);
+
+#include "vaddv-intrinsic.x"
+
+int
+main (void)
+{
+  const float32_t pool_v2sf[] = {4.0f, 9.0f};
+  const float32_t pool_v4sf[] = {4.0f, 9.0f, 16.0f, 25.0f};
+  const float64_t pool_v2df[] = {4.0, 9.0};
+
+  if (test_vaddv_v2sf (pool_v2sf) != 13.0f)
+    abort ();
+
+  if (test_vaddv_v4sf (pool_v4sf) != 54.0f)
+    abort ();
+
+  if (test_vaddv_v2df (pool_v2df) != 13.0)
+    abort ();
+
+  return 0;
+}
diff --git a/gcc/testsuite/gcc.target/aarch64/vaddv-intrinsic.x b/gcc/testsuite/gcc.target/aarch64/vaddv-intrinsic.x
new file mode 100644
index 0000000..7bf38ca
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/vaddv-intrinsic.x
@@ -0,0 +1,27 @@ 
+
+float32_t
+test_vaddv_v2sf (const float32_t *pool)
+{
+  float32x2_t val;
+
+  val = vld1_f32 (pool);
+  return vaddv_f32 (val);
+}
+
+float32_t
+test_vaddv_v4sf (const float32_t *pool)
+{
+  float32x4_t val;
+
+  val = vld1q_f32 (pool);
+  return vaddvq_f32 (val);
+}
+
+float64_t
+test_vaddv_v2df (const float64_t *pool)
+{
+  float64x2_t val;
+
+  val = vld1q_f64 (pool);
+  return vaddvq_f64 (val);
+}