diff mbox

[AARCH64,5/5] Add macro fusion support for cmp/b.X for ThunderX

Message ID 546B2098.8000402@arm.com
State New
Headers show

Commit Message

Kyrylo Tkachov Nov. 18, 2014, 10:34 a.m. UTC
Hi all,

This is a rebase of Andrews' CMP+BRANCH fusion patch on top of my macro 
fusion patches.
I've assigned the number 1<<4 to AARCH64_FUSE_CMP_BRANCH.

I've given it a test on top of my fusion patches.

Ok for trunk together with the rest?

2014-11-14  Andrew Pinski  <apinski@cavium.com>

     * config/aarch64/aarch64.c (AARCH64_FUSE_CMP_BRANCH): New define.
     (thunderx_tunings): Add AARCH64_FUSE_CMP_BRANCH to fuseable_ops.
     (aarch_macro_fusion_pair_p): Handle AARCH64_FUSE_CMP_BRANCH.

Comments

Richard Earnshaw Nov. 18, 2014, 10:54 a.m. UTC | #1
On 18/11/14 10:34, Kyrill Tkachov wrote:
> Hi all,
> 
> This is a rebase of Andrews' CMP+BRANCH fusion patch on top of my macro 
> fusion patches.
> I've assigned the number 1<<4 to AARCH64_FUSE_CMP_BRANCH.
> 
> I've given it a test on top of my fusion patches.
> 
> Ok for trunk together with the rest?
> 
> 2014-11-14  Andrew Pinski  <apinski@cavium.com>
> 
>      * config/aarch64/aarch64.c (AARCH64_FUSE_CMP_BRANCH): New define.
>      (thunderx_tunings): Add AARCH64_FUSE_CMP_BRANCH to fuseable_ops.
>      (aarch_macro_fusion_pair_p): Handle AARCH64_FUSE_CMP_BRANCH.
> 
> 
> aarch64-cmp-branch.patch
> 
> 
> commit 619f6c056e80f284a834d2b24e6a9c1f933a2dd5
> Author: Kyrylo Tkachov <kyrylo.tkachov@arm.com>
> Date:   Fri Nov 14 09:16:08 2014 +0000
> 
>     [AArch64][apinski] CMP+branch macro fusion
> 
> diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
> index 2e63269..d0e52b0 100644
> --- a/gcc/config/aarch64/aarch64.c
> +++ b/gcc/config/aarch64/aarch64.c
> @@ -308,6 +308,7 @@ static const struct cpu_vector_cost cortexa57_vector_cost =
>  #define AARCH64_FUSE_ADRP_ADD	(1 << 1)
>  #define AARCH64_FUSE_MOVK_MOVK	(1 << 2)
>  #define AARCH64_FUSE_ADRP_LDR	(1 << 3)
> +#define AARCH64_FUSE_CMP_BRANCH	(1 << 4)
>  
>  #if HAVE_DESIGNATED_INITIALIZERS && GCC_VERSION >= 2007
>  __extension__
> @@ -353,7 +354,7 @@ static const struct tune_params thunderx_tunings =
>    &generic_vector_cost,
>    NAMED_PARAM (memmov_cost, 6),
>    NAMED_PARAM (issue_rate, 2),
> -  NAMED_PARAM (fuseable_ops, AARCH64_FUSE_NOTHING)
> +  NAMED_PARAM (fuseable_ops, AARCH64_FUSE_CMP_BRANCH)
>  };
>  
>  /* A processor implementing AArch64.  */
> @@ -10133,6 +10134,18 @@ aarch_macro_fusion_pair_p (rtx_insn *prev, rtx_insn *curr)
>          }
>      }
>  
> +  if ((aarch64_tune_params->fuseable_ops & AARCH64_FUSE_CMP_BRANCH)
> +      && any_condjump_p (curr))
> +    {
> +      /* FIXME: this misses some which are considered simple arthematic
> +         instructions for ThunderX.  Simple shifts are missed here.  */
> +      if (get_attr_type (prev) == TYPE_ALUS_SREG
> +          || get_attr_type (prev) == TYPE_ALUS_IMM
> +          || get_attr_type (prev) == TYPE_LOGICS_REG
> +          || get_attr_type (prev) == TYPE_LOGICS_IMM)
> +	return true;
> +    }
> +

Repeatedly calling get_attr_type on the same insn is a bit wasteful,
despite the caching that's done.  It would be better to call it once and
save the value in a variable.

Also, calling that function may cause prev to become the extracted insn.
 Is it safe to assume that the caller(s) of this code understand that
this might have happened, or do we need to re-extract curr before returning?

>    return false;
>  }
>  
>
diff mbox

Patch

commit 619f6c056e80f284a834d2b24e6a9c1f933a2dd5
Author: Kyrylo Tkachov <kyrylo.tkachov@arm.com>
Date:   Fri Nov 14 09:16:08 2014 +0000

    [AArch64][apinski] CMP+branch macro fusion

diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
index 2e63269..d0e52b0 100644
--- a/gcc/config/aarch64/aarch64.c
+++ b/gcc/config/aarch64/aarch64.c
@@ -308,6 +308,7 @@  static const struct cpu_vector_cost cortexa57_vector_cost =
 #define AARCH64_FUSE_ADRP_ADD	(1 << 1)
 #define AARCH64_FUSE_MOVK_MOVK	(1 << 2)
 #define AARCH64_FUSE_ADRP_LDR	(1 << 3)
+#define AARCH64_FUSE_CMP_BRANCH	(1 << 4)
 
 #if HAVE_DESIGNATED_INITIALIZERS && GCC_VERSION >= 2007
 __extension__
@@ -353,7 +354,7 @@  static const struct tune_params thunderx_tunings =
   &generic_vector_cost,
   NAMED_PARAM (memmov_cost, 6),
   NAMED_PARAM (issue_rate, 2),
-  NAMED_PARAM (fuseable_ops, AARCH64_FUSE_NOTHING)
+  NAMED_PARAM (fuseable_ops, AARCH64_FUSE_CMP_BRANCH)
 };
 
 /* A processor implementing AArch64.  */
@@ -10133,6 +10134,18 @@  aarch_macro_fusion_pair_p (rtx_insn *prev, rtx_insn *curr)
         }
     }
 
+  if ((aarch64_tune_params->fuseable_ops & AARCH64_FUSE_CMP_BRANCH)
+      && any_condjump_p (curr))
+    {
+      /* FIXME: this misses some which are considered simple arthematic
+         instructions for ThunderX.  Simple shifts are missed here.  */
+      if (get_attr_type (prev) == TYPE_ALUS_SREG
+          || get_attr_type (prev) == TYPE_ALUS_IMM
+          || get_attr_type (prev) == TYPE_LOGICS_REG
+          || get_attr_type (prev) == TYPE_LOGICS_IMM)
+	return true;
+    }
+
   return false;
 }