diff mbox

[AArch64,3/5] Implement fusion of MOVK+MOVK

Message ID 546B208D.3010300@arm.com
State New
Headers show

Commit Message

Kyrylo Tkachov Nov. 18, 2014, 10:33 a.m. UTC
Hi all,

Following up on the previous instruction fusion patches this one 
implements the fusion of instruction pairs of the form:
movk Xn, imm16_1 lsl 32
movk Xn, imm16_2 lsl 48

which is usually generated as part of the immediate synthesis code. For 
some cores we don't want to schedule them apart.
These insns are represented in RTL as a SET to a ZERO_EXTRACT so we 
match for that case.

Bootstrapped and tested on aarch64-none-linux-gnu.

Ok for trunk?

Thanks,
Kyrill

2014-11-18  Kyrylo Tkachov  <kyrylo.tkachov@arm.com>

     * config/aarch64/aarch64.c (AARCH64_FUSE_MOVK_MOVK): Define.
     (cortexa53_tunings): Specify AARCH64_FUSE_MOVK_MOVK in fuseable_ops.
     (cortexa57_tunings): Likewise.
     (aarch_macro_fusion_pair_p): Handle AARCH64_FUSE_MOVK_MOVK.

Comments

Marcus Shawcroft Nov. 21, 2014, 4:56 p.m. UTC | #1
On 18 November 2014 10:33, Kyrill Tkachov <kyrylo.tkachov@arm.com> wrote:

> 2014-11-18  Kyrylo Tkachov  <kyrylo.tkachov@arm.com>
>
>     * config/aarch64/aarch64.c (AARCH64_FUSE_MOVK_MOVK): Define.
>     (cortexa53_tunings): Specify AARCH64_FUSE_MOVK_MOVK in fuseable_ops.
>     (cortexa57_tunings): Likewise.
>     (aarch_macro_fusion_pair_p): Handle AARCH64_FUSE_MOVK_MOVK.

OK /Marcus
diff mbox

Patch

commit 161e7901d387fa2daf0ea34dd5df4703916435e0
Author: Kyrylo Tkachov <kyrylo.tkachov@arm.com>
Date:   Wed Nov 12 17:36:56 2014 +0000

    [AArch64] Implement fusion of MOVK+MOVK

diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
index 1eb066c..c3c29ed 100644
--- a/gcc/config/aarch64/aarch64.c
+++ b/gcc/config/aarch64/aarch64.c
@@ -306,6 +306,7 @@  static const struct cpu_vector_cost cortexa57_vector_cost =
 #define AARCH64_FUSE_NOTHING	(0)
 #define AARCH64_FUSE_MOV_MOVK	(1 << 0)
 #define AARCH64_FUSE_ADRP_ADD	(1 << 1)
+#define AARCH64_FUSE_MOVK_MOVK	(1 << 2)
 
 #if HAVE_DESIGNATED_INITIALIZERS && GCC_VERSION >= 2007
 __extension__
@@ -329,7 +330,7 @@  static const struct tune_params cortexa53_tunings =
   &generic_vector_cost,
   NAMED_PARAM (memmov_cost, 4),
   NAMED_PARAM (issue_rate, 2),
-  NAMED_PARAM (fuseable_ops, (AARCH64_FUSE_MOV_MOVK | AARCH64_FUSE_ADRP_ADD))
+  NAMED_PARAM (fuseable_ops, (AARCH64_FUSE_MOV_MOVK | AARCH64_FUSE_ADRP_ADD | AARCH64_FUSE_MOVK_MOVK))
 };
 
 static const struct tune_params cortexa57_tunings =
@@ -340,7 +341,7 @@  static const struct tune_params cortexa57_tunings =
   &cortexa57_vector_cost,
   NAMED_PARAM (memmov_cost, 4),
   NAMED_PARAM (issue_rate, 3),
-  NAMED_PARAM (fuseable_ops, (AARCH64_FUSE_MOV_MOVK | AARCH64_FUSE_ADRP_ADD))
+  NAMED_PARAM (fuseable_ops, (AARCH64_FUSE_MOV_MOVK | AARCH64_FUSE_ADRP_ADD | AARCH64_FUSE_MOVK_MOVK))
 };
 
 static const struct tune_params thunderx_tunings =
@@ -10430,6 +10431,36 @@  aarch_macro_fusion_pair_p (rtx_insn *prev, rtx_insn *curr)
         }
     }
 
+  if (simple_sets_p
+      && (aarch64_tune_params->fuseable_ops & AARCH64_FUSE_MOVK_MOVK))
+    {
+
+      /* We're trying to match:
+         prev (movk) == (set (zero_extract (reg r0)
+                                           (const_int 16)
+                                           (const_int 32))
+                             (const_int imm16_1))
+         curr (movk) == (set (zero_extract (reg r0)
+                                           (const_int 16)
+                                           (const_int 48))
+                             (const_int imm16_2))  */
+
+      if (GET_CODE (SET_DEST (prev_set)) == ZERO_EXTRACT
+          && GET_CODE (SET_DEST (curr_set)) == ZERO_EXTRACT
+          && REG_P (XEXP (SET_DEST (prev_set), 0))
+          && REG_P (XEXP (SET_DEST (curr_set), 0))
+          && REGNO (XEXP (SET_DEST (prev_set), 0))
+             == REGNO (XEXP (SET_DEST (curr_set), 0))
+          && CONST_INT_P (XEXP (SET_DEST (prev_set), 2))
+          && CONST_INT_P (XEXP (SET_DEST (curr_set), 2))
+          && INTVAL (XEXP (SET_DEST (prev_set), 2)) == 32
+          && INTVAL (XEXP (SET_DEST (curr_set), 2)) == 48
+          && CONST_INT_P (SET_SRC (prev_set))
+          && CONST_INT_P (SET_SRC (curr_set)))
+        return true;
+
+    }
+
   return false;
 }