diff mbox

[AArch64] Implement TARGET_SCHED_MACRO_FUSION_PAIR_P

Message ID 5464CC1A.4050405@arm.com
State New
Headers show

Commit Message

Kyrylo Tkachov Nov. 13, 2014, 3:19 p.m. UTC
On 13/11/14 07:24, Andrew Pinski wrote:
> On Tue, Nov 11, 2014 at 3:55 AM, Kyrill Tkachov <kyrylo.tkachov@arm.com> wrote:
>> Hi all,
>>
>> This is the aarch64 implementation of the macro fusion hook, used to fuse
>> mov+movk instructions together.
>>
>> A new field is declared in the tuning struct and as we add more fuseable ops
>> in the future we will fill in more bits in the fuseable_ops field.
>>
>> Bootstrapped and tested on aarch64-none-linux-gnu.
>>
>> Ok for trunk?
> I have a few comments about this patch as I have a patch which depends
> on this for ThunderX.
>
> Each new function should have a comment before it saying what the function does.
> The checks inside aarch_macro_fusion_pair_p for single_set and
> any_condjump_p seems a little too restrictive to add more fusion
> without major changes to the code.
> In the fusion case I am adding is about fusing any single cycle
> arithmetic instruction (on ThunderX) that produces flags and the
> branch that consumes it.

Hi Andrew,

Thanks for the feedback, here's an updated patch that should be a bit 
more robust.

How does this look?

Kyrill

2014-11-13  Kyrylo Tkachov  <kyrylo.tkachov@arm.com>

     * config/aarch64/aarch64.c: Include tm-constrs.h
     (AARCH64_FUSE_ADRP_ADD): Define.
     (cortexa57_tunings): Add AARCH64_FUSE_ADRP_ADD to fuseable_ops.
     (cortexa53_tunings): Likewise.
     (aarch_macro_fusion_pair_p): Handle AARCH64_FUSE_ADRP_ADD.

> Thanks,
> Andrew
>
>> 2014-11-11  Kyrylo Tkachov  <kyrylo.tkachov@arm.com>
>>
>>      * config/aarch64/aarch64-protos.h (struct tune_params): Add
>>      fuseable_ops field.
>>      * config/aarch64/aarch64.c (generic_tunings): Specify fuseable_ops.
>>      (cortexa53_tunings): Likewise.
>>      (cortexa57_tunings): Likewise.
>>      (thunderx_tunings): Likewise.
>>      (aarch64_macro_fusion_p): New function.
>>      (aarch_macro_fusion_pair_p): Likewise.
>>      (TARGET_SCHED_MACRO_FUSION_P): Define.
>>      (TARGET_SCHED_MACRO_FUSION_PAIR_P): Likewise.
>>      (AARCH64_FUSE_MOV_MOVK): Likewise.
>>      (AARCH64_FUSE_NOTHING): Likewise.

Comments

Andrew Pinski Nov. 13, 2014, 10:26 p.m. UTC | #1
On Thu, Nov 13, 2014 at 7:19 AM, Kyrill Tkachov <kyrylo.tkachov@arm.com> wrote:
>
> On 13/11/14 07:24, Andrew Pinski wrote:
>>
>> On Tue, Nov 11, 2014 at 3:55 AM, Kyrill Tkachov <kyrylo.tkachov@arm.com>
>> wrote:
>>>
>>> Hi all,
>>>
>>> This is the aarch64 implementation of the macro fusion hook, used to fuse
>>> mov+movk instructions together.
>>>
>>> A new field is declared in the tuning struct and as we add more fuseable
>>> ops
>>> in the future we will fill in more bits in the fuseable_ops field.
>>>
>>> Bootstrapped and tested on aarch64-none-linux-gnu.
>>>
>>> Ok for trunk?
>>
>> I have a few comments about this patch as I have a patch which depends
>> on this for ThunderX.
>>
>> Each new function should have a comment before it saying what the function
>> does.
>> The checks inside aarch_macro_fusion_pair_p for single_set and
>> any_condjump_p seems a little too restrictive to add more fusion
>> without major changes to the code.
>> In the fusion case I am adding is about fusing any single cycle
>> arithmetic instruction (on ThunderX) that produces flags and the
>> branch that consumes it.
>
>
> Hi Andrew,
>
> Thanks for the feedback, here's an updated patch that should be a bit more
> robust.
>
> How does this look?

It looks good except I notice you have one bug.  curr_set can be null
when you look at its SET_DEST.
I would check simple_sets_p as part of the if that is checking for
AARCH64_FUSE_MOV_MOVK.

Thanks,
Andrew Pinski


>
> Kyrill
>
> 2014-11-13  Kyrylo Tkachov  <kyrylo.tkachov@arm.com>
>
>     * config/aarch64/aarch64.c: Include tm-constrs.h
>     (AARCH64_FUSE_ADRP_ADD): Define.
>     (cortexa57_tunings): Add AARCH64_FUSE_ADRP_ADD to fuseable_ops.
>     (cortexa53_tunings): Likewise.
>     (aarch_macro_fusion_pair_p): Handle AARCH64_FUSE_ADRP_ADD.
>
>
>> Thanks,
>> Andrew
>>
>>> 2014-11-11  Kyrylo Tkachov  <kyrylo.tkachov@arm.com>
>>>
>>>      * config/aarch64/aarch64-protos.h (struct tune_params): Add
>>>      fuseable_ops field.
>>>      * config/aarch64/aarch64.c (generic_tunings): Specify fuseable_ops.
>>>      (cortexa53_tunings): Likewise.
>>>      (cortexa57_tunings): Likewise.
>>>      (thunderx_tunings): Likewise.
>>>      (aarch64_macro_fusion_p): New function.
>>>      (aarch_macro_fusion_pair_p): Likewise.
>>>      (TARGET_SCHED_MACRO_FUSION_P): Define.
>>>      (TARGET_SCHED_MACRO_FUSION_PAIR_P): Likewise.
>>>      (AARCH64_FUSE_MOV_MOVK): Likewise.
>>>      (AARCH64_FUSE_NOTHING): Likewise.
diff mbox

Patch

commit 94f8c07830b50e612ea1378fab0ed8363a418826
Author: Kyrylo Tkachov <kyrylo.tkachov@arm.com>
Date:   Tue Oct 21 10:36:48 2014 +0100

    [AArch64] Implement TARGET_MACRO_FUSION

diff --git a/gcc/config/aarch64/aarch64-protos.h b/gcc/config/aarch64/aarch64-protos.h
index 470b9eb..9e0ff8c 100644
--- a/gcc/config/aarch64/aarch64-protos.h
+++ b/gcc/config/aarch64/aarch64-protos.h
@@ -170,6 +170,7 @@  struct tune_params
   const struct cpu_vector_cost *const vec_costs;
   const int memmov_cost;
   const int issue_rate;
+  const unsigned int fuseable_ops;
 };
 
 HOST_WIDE_INT aarch64_initial_elimination_offset (unsigned, unsigned);
diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
index 0429d96..7121a05 100644
--- a/gcc/config/aarch64/aarch64.c
+++ b/gcc/config/aarch64/aarch64.c
@@ -302,6 +302,9 @@  static const struct cpu_vector_cost cortexa57_vector_cost =
   NAMED_PARAM (cond_not_taken_branch_cost, 1)
 };
 
+#define AARCH64_FUSE_NOTHING	(0)
+#define AARCH64_FUSE_MOV_MOVK	(1 << 0)
+
 #if HAVE_DESIGNATED_INITIALIZERS && GCC_VERSION >= 2007
 __extension__
 #endif
@@ -312,7 +315,8 @@  static const struct tune_params generic_tunings =
   &generic_regmove_cost,
   &generic_vector_cost,
   NAMED_PARAM (memmov_cost, 4),
-  NAMED_PARAM (issue_rate, 2)
+  NAMED_PARAM (issue_rate, 2),
+  NAMED_PARAM (fuseable_ops, AARCH64_FUSE_NOTHING)
 };
 
 static const struct tune_params cortexa53_tunings =
@@ -322,7 +326,8 @@  static const struct tune_params cortexa53_tunings =
   &cortexa53_regmove_cost,
   &generic_vector_cost,
   NAMED_PARAM (memmov_cost, 4),
-  NAMED_PARAM (issue_rate, 2)
+  NAMED_PARAM (issue_rate, 2),
+  NAMED_PARAM (fuseable_ops, AARCH64_FUSE_MOV_MOVK)
 };
 
 static const struct tune_params cortexa57_tunings =
@@ -332,7 +337,8 @@  static const struct tune_params cortexa57_tunings =
   &cortexa57_regmove_cost,
   &cortexa57_vector_cost,
   NAMED_PARAM (memmov_cost, 4),
-  NAMED_PARAM (issue_rate, 3)
+  NAMED_PARAM (issue_rate, 3),
+  NAMED_PARAM (fuseable_ops, AARCH64_FUSE_MOV_MOVK)
 };
 
 static const struct tune_params thunderx_tunings =
@@ -342,7 +348,8 @@  static const struct tune_params thunderx_tunings =
   &thunderx_regmove_cost,
   &generic_vector_cost,
   NAMED_PARAM (memmov_cost, 6),
-  NAMED_PARAM (issue_rate, 2)
+  NAMED_PARAM (issue_rate, 2),
+  NAMED_PARAM (fuseable_ops, AARCH64_FUSE_NOTHING)
 };
 
 /* A processor implementing AArch64.  */
@@ -9968,6 +9975,58 @@  aarch64_use_by_pieces_infrastructure_p (unsigned int size,
   return default_use_by_pieces_infrastructure_p (size, align, op, speed_p);
 }
 
+/* Implement TARGET_SCHED_MACRO_FUSION_P.  Return true if target supports
+   instruction fusion of some sort.  */
+
+static bool
+aarch64_macro_fusion_p (void)
+{
+  return aarch64_tune_params->fuseable_ops != AARCH64_FUSE_NOTHING;
+}
+
+
+/* Implement TARGET_SCHED_MACRO_FUSION_PAIR_P.  Return true if PREV and CURR
+   should be kept together during scheduling.  */
+
+static bool
+aarch_macro_fusion_pair_p (rtx_insn *prev, rtx_insn *curr)
+{
+  rtx set_dest;
+  rtx prev_set = single_set (prev);
+  rtx curr_set = single_set (curr);
+  /* prev and curr are simple SET insns i.e. no flag setting or branching.  */
+  bool simple_sets_p = prev_set && curr_set && !any_condjump_p (curr);
+
+  if (!aarch64_macro_fusion_p ())
+    return false;
+
+  if (aarch64_tune_params->fuseable_ops & AARCH64_FUSE_MOV_MOVK)
+    {
+      /* We are trying to match:
+         prev (mov)  == (set (reg r0) (const_int imm16))
+         curr (movk) == (set (zero_extract (reg r0)
+                                           (const_int 16)
+                                           (const_int 16))
+                             (const_int imm16_1))  */
+
+      set_dest = SET_DEST (curr_set);
+
+      if (simple_sets_p && GET_CODE (set_dest) == ZERO_EXTRACT
+          && CONST_INT_P (SET_SRC (curr_set))
+          && CONST_INT_P (SET_SRC (prev_set))
+          && CONST_INT_P (XEXP (set_dest, 2))
+          && INTVAL (XEXP (set_dest, 2)) == 16
+          && REG_P (XEXP (set_dest, 0))
+          && REG_P (SET_DEST (prev_set))
+          && REGNO (XEXP (set_dest, 0)) == REGNO (SET_DEST (prev_set)))
+        {
+          return true;
+        }
+    }
+
+  return false;
+}
+
 #undef TARGET_ADDRESS_COST
 #define TARGET_ADDRESS_COST aarch64_address_cost
 
@@ -10224,6 +10283,12 @@  aarch64_use_by_pieces_infrastructure_p (unsigned int size,
 #define TARGET_USE_BY_PIECES_INFRASTRUCTURE_P \
   aarch64_use_by_pieces_infrastructure_p
 
+#undef TARGET_SCHED_MACRO_FUSION_P
+#define TARGET_SCHED_MACRO_FUSION_P aarch64_macro_fusion_p
+
+#undef TARGET_SCHED_MACRO_FUSION_PAIR_P
+#define TARGET_SCHED_MACRO_FUSION_PAIR_P aarch_macro_fusion_pair_p
+
 struct gcc_target targetm = TARGET_INITIALIZER;
 
 #include "gt-aarch64.h"