diff mbox

[6/14,AArch64] Restore gimple_folding of reduction intrinsics

Message ID 541ACA76.5030107@arm.com
State New
Headers show

Commit Message

Alan Lawrence Sept. 18, 2014, 12:05 p.m. UTC
This gives us back the constant-folding of the neon-intrinsics that was removed 
in the first patch, but is now OK for bigendian too.

bootstrapped on aarch64-none-linux-gnu.
check-gcc on aarch64-none-elf and aarch64_be-none-elf.

gcc/ChangeLog:

	* config/aarch64/aarch64.c (TARGET_GIMPLE_FOLD_BUILTIN): Define again.
	* config/aarch64/aarch64-builtins.c (aarch64_gimple_fold_builtin):
	Restore, enable for bigendian, update to use __builtin..._scal...

Comments

Marcus Shawcroft Sept. 24, 2014, 9:48 a.m. UTC | #1
On 18 September 2014 13:05, Alan Lawrence <alan.lawrence@arm.com> wrote:
> This gives us back the constant-folding of the neon-intrinsics that was
> removed in the first patch, but is now OK for bigendian too.
>
> bootstrapped on aarch64-none-linux-gnu.
> check-gcc on aarch64-none-elf and aarch64_be-none-elf.
>
> gcc/ChangeLog:
>
>         * config/aarch64/aarch64.c (TARGET_GIMPLE_FOLD_BUILTIN): Define
> again.
>         * config/aarch64/aarch64-builtins.c (aarch64_gimple_fold_builtin):
>         Restore, enable for bigendian, update to use __builtin..._scal...

OK /Marcus
diff mbox

Patch

diff --git a/gcc/config/aarch64/aarch64-builtins.c b/gcc/config/aarch64/aarch64-builtins.c
index 15eb7c686d95b1d66cbd514500ec29ba074eaa3f..0432d3aa1a515a15b051ba89afec7c0306cb5803 100644
--- a/gcc/config/aarch64/aarch64-builtins.c
+++ b/gcc/config/aarch64/aarch64-builtins.c
@@ -1333,9 +1333,6 @@  aarch64_fold_builtin (tree fndecl, int n_args ATTRIBUTE_UNUSED, tree *args,
   return NULL_TREE;
 }
 
-/* Handling of reduction operations temporarily removed so as to decouple
-   changes to tree codes from AArch64 NEON Intrinsics.  */
-#if 0
 bool
 aarch64_gimple_fold_builtin (gimple_stmt_iterator *gsi)
 {
@@ -1345,19 +1342,6 @@  aarch64_gimple_fold_builtin (gimple_stmt_iterator *gsi)
   tree fndecl;
   gimple new_stmt = NULL;
 
-  /* The operations folded below are reduction operations.  These are
-     defined to leave their result in the 0'th element (from the perspective
-     of GCC).  The architectural instruction we are folding will leave the
-     result in the 0'th element (from the perspective of the architecture).
-     For big-endian systems, these perspectives are not aligned.
-
-     It is therefore wrong to perform this fold on big-endian.  There
-     are some tricks we could play with shuffling, but the mid-end is
-     inconsistent in the way it treats reduction operations, so we will
-     end up in difficulty.  Until we fix the ambiguity - just bail out.  */
-  if (BYTES_BIG_ENDIAN)
-    return false;
-
   if (call)
     {
       fndecl = gimple_call_fndecl (stmt);
@@ -1369,23 +1353,28 @@  aarch64_gimple_fold_builtin (gimple_stmt_iterator *gsi)
 			? gimple_call_arg_ptr (stmt, 0)
 			: &error_mark_node);
 
+	  /* We use gimple's REDUC_(PLUS|MIN|MAX)_EXPRs for float, signed int
+	     and unsigned int; it will distinguish according to the types of
+	     the arguments to the __builtin.  */
 	  switch (fcode)
 	    {
-	      BUILTIN_VALL (UNOP, reduc_splus_, 10)
-		new_stmt = gimple_build_assign_with_ops (
+	      BUILTIN_VALL (UNOP, reduc_plus_scal_, 10)
+	        new_stmt = gimple_build_assign_with_ops (
 						REDUC_PLUS_EXPR,
 						gimple_call_lhs (stmt),
 						args[0],
 						NULL_TREE);
 		break;
-	      BUILTIN_VDQIF (UNOP, reduc_smax_, 10)
+	      BUILTIN_VDQIF (UNOP, reduc_smax_scal_, 10)
+	      BUILTIN_VDQ_BHSI (UNOPU, reduc_umax_scal_, 10)
 		new_stmt = gimple_build_assign_with_ops (
 						REDUC_MAX_EXPR,
 						gimple_call_lhs (stmt),
 						args[0],
 						NULL_TREE);
 		break;
-	      BUILTIN_VDQIF (UNOP, reduc_smin_, 10)
+	      BUILTIN_VDQIF (UNOP, reduc_smin_scal_, 10)
+	      BUILTIN_VDQ_BHSI (UNOPU, reduc_umin_scal_, 10)
 		new_stmt = gimple_build_assign_with_ops (
 						REDUC_MIN_EXPR,
 						gimple_call_lhs (stmt),
@@ -1407,7 +1396,6 @@  aarch64_gimple_fold_builtin (gimple_stmt_iterator *gsi)
 
   return changed;
 }
-#endif
 
 void
 aarch64_atomic_assign_expand_fenv (tree *hold, tree *clear, tree *update)
diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
index 9197ec038b7d40a601c886b846113c50a29cf5e2..e7946fc0b70ced70a4e98caa0a33121f29242aad 100644
--- a/gcc/config/aarch64/aarch64.c
+++ b/gcc/config/aarch64/aarch64.c
@@ -9925,8 +9925,8 @@  aarch64_expand_movmem (rtx *operands)
 #undef TARGET_FRAME_POINTER_REQUIRED
 #define TARGET_FRAME_POINTER_REQUIRED aarch64_frame_pointer_required
 
-//#undef TARGET_GIMPLE_FOLD_BUILTIN
-//#define TARGET_GIMPLE_FOLD_BUILTIN aarch64_gimple_fold_builtin
+#undef TARGET_GIMPLE_FOLD_BUILTIN
+#define TARGET_GIMPLE_FOLD_BUILTIN aarch64_gimple_fold_builtin
 
 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
 #define TARGET_GIMPLIFY_VA_ARG_EXPR aarch64_gimplify_va_arg_expr