commit 3181b0988eed091c8b1ead7a6381c6f9aee7774e
Author: Kyrylo Tkachov <kyrylo.tkachov@arm.com>
Date: Tue Oct 21 10:36:48 2014 +0100
[AArch64] Implement TARGET_MACRO_FUSION
@@ -170,6 +170,7 @@ struct tune_params
const struct cpu_vector_cost *const vec_costs;
const int memmov_cost;
const int issue_rate;
+ const unsigned int fuseable_ops;
};
HOST_WIDE_INT aarch64_initial_elimination_offset (unsigned, unsigned);
@@ -299,6 +299,9 @@ static const struct cpu_vector_cost cortexa57_vector_cost =
NAMED_PARAM (cond_not_taken_branch_cost, 1)
};
+#define AARCH64_FUSE_NOTHING (0)
+#define AARCH64_FUSE_MOV_MOVK (1 << 0)
+
#if HAVE_DESIGNATED_INITIALIZERS && GCC_VERSION >= 2007
__extension__
#endif
@@ -309,7 +312,8 @@ static const struct tune_params generic_tunings =
&generic_regmove_cost,
&generic_vector_cost,
NAMED_PARAM (memmov_cost, 4),
- NAMED_PARAM (issue_rate, 2)
+ NAMED_PARAM (issue_rate, 2),
+ NAMED_PARAM (fuseable_ops, AARCH64_FUSE_NOTHING)
};
static const struct tune_params cortexa53_tunings =
@@ -319,7 +323,8 @@ static const struct tune_params cortexa53_tunings =
&cortexa53_regmove_cost,
&generic_vector_cost,
NAMED_PARAM (memmov_cost, 4),
- NAMED_PARAM (issue_rate, 2)
+ NAMED_PARAM (issue_rate, 2),
+ NAMED_PARAM (fuseable_ops, AARCH64_FUSE_MOV_MOVK)
};
static const struct tune_params cortexa57_tunings =
@@ -329,7 +334,8 @@ static const struct tune_params cortexa57_tunings =
&cortexa57_regmove_cost,
&cortexa57_vector_cost,
NAMED_PARAM (memmov_cost, 4),
- NAMED_PARAM (issue_rate, 3)
+ NAMED_PARAM (issue_rate, 3),
+ NAMED_PARAM (fuseable_ops, AARCH64_FUSE_MOV_MOVK)
};
static const struct tune_params thunderx_tunings =
@@ -339,7 +345,8 @@ static const struct tune_params thunderx_tunings =
&thunderx_regmove_cost,
&generic_vector_cost,
NAMED_PARAM (memmov_cost, 6),
- NAMED_PARAM (issue_rate, 2)
+ NAMED_PARAM (issue_rate, 2),
+ NAMED_PARAM (fuseable_ops, AARCH64_FUSE_NOTHING)
};
/* A processor implementing AArch64. */
@@ -10017,6 +10024,48 @@ aarch64_use_by_pieces_infrastructure_p (unsigned int size,
return default_use_by_pieces_infrastructure_p (size, align, op, speed_p);
}
+static bool
+aarch64_macro_fusion_p (void)
+{
+ return aarch64_tune_params->fuseable_ops != AARCH64_FUSE_NOTHING;
+}
+
+static bool
+aarch_macro_fusion_pair_p (rtx_insn *prev, rtx_insn *curr)
+{
+ rtx set_dest;
+ rtx prev_set = single_set (prev);
+ rtx curr_set = single_set (curr);
+
+ if (!prev_set
+ || !curr_set)
+ return false;
+
+ if (any_condjump_p (curr))
+ return false;
+
+ if (!aarch64_macro_fusion_p ())
+ return false;
+
+ if (aarch64_tune_params->fuseable_ops & AARCH64_FUSE_MOV_MOVK)
+ {
+ /* We are trying to fuse
+ mov imm / movk imm
+ instructions as a group that gets scheduled together. */
+
+ set_dest = SET_DEST (curr_set);
+
+ return GET_CODE (set_dest) == ZERO_EXTRACT
+ && CONST_INT_P (SET_SRC (curr_set))
+ && CONST_INT_P (SET_SRC (prev_set))
+ && REG_P (XEXP (set_dest, 0))
+ && REG_P (SET_DEST (prev_set))
+ && REGNO (XEXP (set_dest, 0)) == REGNO (SET_DEST (prev_set));
+ }
+
+ return false;
+}
+
#undef TARGET_ADDRESS_COST
#define TARGET_ADDRESS_COST aarch64_address_cost
@@ -10273,6 +10322,12 @@ aarch64_use_by_pieces_infrastructure_p (unsigned int size,
#define TARGET_USE_BY_PIECES_INFRASTRUCTURE_P \
aarch64_use_by_pieces_infrastructure_p
+#undef TARGET_SCHED_MACRO_FUSION_P
+#define TARGET_SCHED_MACRO_FUSION_P aarch64_macro_fusion_p
+
+#undef TARGET_SCHED_MACRO_FUSION_PAIR_P
+#define TARGET_SCHED_MACRO_FUSION_PAIR_P aarch_macro_fusion_pair_p
+
struct gcc_target targetm = TARGET_INITIALIZER;
#include "gt-aarch64.h"