diff mbox

[1/3,AARCH64] Add macro fusion support for cmp/b.X for ThunderX

Message ID 1415926574-3080-2-git-send-email-apinski@cavium.com
State New
Headers show

Commit Message

Andrew Pinski Nov. 14, 2014, 12:56 a.m. UTC
In ThunderX, any 1 cycle arthemantic instruction that produces the flags
register, will be fused with a branch.  This patch depends on
https://gcc.gnu.org/ml/gcc-patches/2014-11/msg01508.html.
Note I know bit 1 is going is already going to be used and that is why I
proposed this being bit 2.

Build and tested for aarch64-elf with no regressions.

ChangeLog:
* config/aarch64/aarch64.c (AARCH64_FUSE_CMP_BRANCH): New define.
(thunderx_tunings): Add AARCH64_FUSE_CMP_BRANCH to fuseable_ops.
(aarch_macro_fusion_pair_p): Handle AARCH64_FUSE_CMP_BRANCH.
---
 gcc/config/aarch64/aarch64.c |   15 ++++++++++++++-
 1 files changed, 14 insertions(+), 1 deletions(-)

Comments

Kyrylo Tkachov Nov. 14, 2014, 9:08 a.m. UTC | #1
Hi Andrew,

On 14/11/14 00:56, Andrew Pinski wrote:
> In ThunderX, any 1 cycle arthemantic instruction that produces the flags
> register, will be fused with a branch.  This patch depends on
> https://gcc.gnu.org/ml/gcc-patches/2014-11/msg01508.html.
> Note I know bit 1 is going is already going to be used and that is why I
> proposed this being bit 2.
>
> Build and tested for aarch64-elf with no regressions.
>
> ChangeLog:
> * config/aarch64/aarch64.c (AARCH64_FUSE_CMP_BRANCH): New define.
> (thunderx_tunings): Add AARCH64_FUSE_CMP_BRANCH to fuseable_ops.
> (aarch_macro_fusion_pair_p): Handle AARCH64_FUSE_CMP_BRANCH.
> ---
>   gcc/config/aarch64/aarch64.c |   15 ++++++++++++++-
>   1 files changed, 14 insertions(+), 1 deletions(-)
>
> diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
> index a258f40..5216ac0 100644
> --- a/gcc/config/aarch64/aarch64.c
> +++ b/gcc/config/aarch64/aarch64.c
> @@ -304,6 +304,7 @@ static const struct cpu_vector_cost cortexa57_vector_cost =
>   
>   #define AARCH64_FUSE_NOTHING	(0)
>   #define AARCH64_FUSE_MOV_MOVK	(1 << 0)
> +#define AARCH64_FUSE_CMP_BRANCH	(1 << 2)
>   
>   #if HAVE_DESIGNATED_INITIALIZERS && GCC_VERSION >= 2007
>   __extension__
> @@ -349,7 +350,7 @@ static const struct tune_params thunderx_tunings =
>     &generic_vector_cost,
>     NAMED_PARAM (memmov_cost, 6),
>     NAMED_PARAM (issue_rate, 2),
> -  NAMED_PARAM (fuseable_ops, AARCH64_FUSE_NOTHING)
> +  NAMED_PARAM (fuseable_ops, AARCH64_FUSE_CMP_BRANCH)
>   };
>   
>   /* A processor implementing AArch64.  */
> @@ -10036,6 +10037,18 @@ aarch_macro_fusion_pair_p (rtx_insn *prev, rtx_insn *curr)
>           }
>       }
>   
> +  if ((aarch64_tune_params->fuseable_ops & AARCH64_FUSE_CMP_BRANCH)
> +      && any_condjump_p (curr))
> +    {
> +      /* FIXME: this misses some which is considered simple arthematic
> +         instructions for ThunderX.  Simple shifts are missed here.  */
s/is/are

> +      if (get_attr_type (prev) == TYPE_ALUS_SREG
> +          || get_attr_type (prev) == TYPE_ALUS_IMM
> +          || get_attr_type (prev) == TYPE_LOGICS_REG
> +          || get_attr_type (prev) == TYPE_LOGICS_IMM)
> +	return true;

IIRC the get_attr_* functions can call recog_memoized on prev which can 
potentially change
the recog_data for the insn, sometimes resulting in corruption. Is this 
definitely safe to do?

Kyrill

> +    }
> +
>     return false;
>   }
>
Andrew Pinski Nov. 14, 2014, 9:50 a.m. UTC | #2
On Fri, Nov 14, 2014 at 1:08 AM, Kyrill Tkachov <kyrylo.tkachov@arm.com> wrote:
> Hi Andrew,
>
>
> On 14/11/14 00:56, Andrew Pinski wrote:
>>
>> In ThunderX, any 1 cycle arthemantic instruction that produces the flags
>> register, will be fused with a branch.  This patch depends on
>> https://gcc.gnu.org/ml/gcc-patches/2014-11/msg01508.html.
>> Note I know bit 1 is going is already going to be used and that is why I
>> proposed this being bit 2.
>>
>> Build and tested for aarch64-elf with no regressions.
>>
>> ChangeLog:
>> * config/aarch64/aarch64.c (AARCH64_FUSE_CMP_BRANCH): New define.
>> (thunderx_tunings): Add AARCH64_FUSE_CMP_BRANCH to fuseable_ops.
>> (aarch_macro_fusion_pair_p): Handle AARCH64_FUSE_CMP_BRANCH.
>> ---
>>   gcc/config/aarch64/aarch64.c |   15 ++++++++++++++-
>>   1 files changed, 14 insertions(+), 1 deletions(-)
>>
>> diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
>> index a258f40..5216ac0 100644
>> --- a/gcc/config/aarch64/aarch64.c
>> +++ b/gcc/config/aarch64/aarch64.c
>> @@ -304,6 +304,7 @@ static const struct cpu_vector_cost
>> cortexa57_vector_cost =
>>     #define AARCH64_FUSE_NOTHING        (0)
>>   #define AARCH64_FUSE_MOV_MOVK (1 << 0)
>> +#define AARCH64_FUSE_CMP_BRANCH        (1 << 2)
>>     #if HAVE_DESIGNATED_INITIALIZERS && GCC_VERSION >= 2007
>>   __extension__
>> @@ -349,7 +350,7 @@ static const struct tune_params thunderx_tunings =
>>     &generic_vector_cost,
>>     NAMED_PARAM (memmov_cost, 6),
>>     NAMED_PARAM (issue_rate, 2),
>> -  NAMED_PARAM (fuseable_ops, AARCH64_FUSE_NOTHING)
>> +  NAMED_PARAM (fuseable_ops, AARCH64_FUSE_CMP_BRANCH)
>>   };
>>     /* A processor implementing AArch64.  */
>> @@ -10036,6 +10037,18 @@ aarch_macro_fusion_pair_p (rtx_insn *prev,
>> rtx_insn *curr)
>>           }
>>       }
>>   +  if ((aarch64_tune_params->fuseable_ops & AARCH64_FUSE_CMP_BRANCH)
>> +      && any_condjump_p (curr))
>> +    {
>> +      /* FIXME: this misses some which is considered simple arthematic
>> +         instructions for ThunderX.  Simple shifts are missed here.  */
>
> s/is/are
>
>> +      if (get_attr_type (prev) == TYPE_ALUS_SREG
>> +          || get_attr_type (prev) == TYPE_ALUS_IMM
>> +          || get_attr_type (prev) == TYPE_LOGICS_REG
>> +          || get_attr_type (prev) == TYPE_LOGICS_IMM)
>> +       return true;
>
>
> IIRC the get_attr_* functions can call recog_memoized on prev which can
> potentially change
> the recog_data for the insn, sometimes resulting in corruption. Is this
> definitely safe to do?

Safe in this context, yes.  I used the similar pattern as what is done for x86:
In the sched-deps.c before calling this function we have the following
(if before reload):
      extract_insn (insn);

extract_insn already will call recog_memoized.

Thanks,
Andrew

>
> Kyrill
>
>> +    }
>> +
>>     return false;
>>   }
>>
>
>
>
diff mbox

Patch

diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
index a258f40..5216ac0 100644
--- a/gcc/config/aarch64/aarch64.c
+++ b/gcc/config/aarch64/aarch64.c
@@ -304,6 +304,7 @@  static const struct cpu_vector_cost cortexa57_vector_cost =
 
 #define AARCH64_FUSE_NOTHING	(0)
 #define AARCH64_FUSE_MOV_MOVK	(1 << 0)
+#define AARCH64_FUSE_CMP_BRANCH	(1 << 2)
 
 #if HAVE_DESIGNATED_INITIALIZERS && GCC_VERSION >= 2007
 __extension__
@@ -349,7 +350,7 @@  static const struct tune_params thunderx_tunings =
   &generic_vector_cost,
   NAMED_PARAM (memmov_cost, 6),
   NAMED_PARAM (issue_rate, 2),
-  NAMED_PARAM (fuseable_ops, AARCH64_FUSE_NOTHING)
+  NAMED_PARAM (fuseable_ops, AARCH64_FUSE_CMP_BRANCH)
 };
 
 /* A processor implementing AArch64.  */
@@ -10036,6 +10037,18 @@  aarch_macro_fusion_pair_p (rtx_insn *prev, rtx_insn *curr)
         }
     }
 
+  if ((aarch64_tune_params->fuseable_ops & AARCH64_FUSE_CMP_BRANCH)
+      && any_condjump_p (curr))
+    {
+      /* FIXME: this misses some which is considered simple arthematic
+         instructions for ThunderX.  Simple shifts are missed here.  */
+      if (get_attr_type (prev) == TYPE_ALUS_SREG
+          || get_attr_type (prev) == TYPE_ALUS_IMM
+          || get_attr_type (prev) == TYPE_LOGICS_REG
+          || get_attr_type (prev) == TYPE_LOGICS_IMM)
+	return true;
+    }
+
   return false;
 }