diff mbox

[ARM] Cortex-A5 tuning [2/2] - tweak instruction conditionalisation

Message ID 20110602174155.489ee293@rex.config
State New
Headers show

Commit Message

Julian Brown June 2, 2011, 4:41 p.m. UTC
On Wed, 01 Jun 2011 17:00:30 +0100
Richard Earnshaw <rearnsha@arm.com> wrote:

> 
> On Wed, 2011-06-01 at 16:49 +0100, Julian Brown wrote:
> > This patch tweaks the behaviour of arm_final_prescan_insn when
> > tuning for Cortex-A5 cores, since branches are cheaper than long
> > sequences of conditionalised instructions on those processors. As
> > posted in the previous patch, this provides a measurable increase
> > in performance on a popular embedded benchmark.
> > 
> > (I didn't use the tuning infrastructure for this one, though it
> > could easily be changed to do so, now I come to think of it.)
>
> I would much prefer that this was done through the tuning
> infrastructure.  If one core likes it this way, there's a strong
> chance of another one coming along that has similar preferences.

How does this version look? I've left the size-optimisation case the
same (max_insns_skipped=6), but added a "tunable" integer to the
tune_params structure allowing the speed-optimisation case to be varied
according to the chosen target tuning.

To maintain existing semantics, this means duplicating the "fastmul"
structure for the StrongARM (XScale also used the StrongARM
setting, but already has its own tuning structure).

Minimally re-tested. OK to apply?

Thanks,

Julian

ChangeLog

    gcc/
    * config/arm/arm-cores.def (strongarm, strongarm110, strongarm1100)
    (strongarm1110): Use strongarm tuning.
    * config/arm/arm-protos.h (tune_params): Add max_insns_skipped
    field.
    * config/arm/arm.c (arm_strongarm_tune): New.
    (arm_slowmul_tune, arm_fastmul_tune, arm_xscale_tune, arm_9e_tune)
    (arm_v6t2_tune, arm_cortex_tune, arm_cortex_a5_tune)
    (arm_cortex_a9_tune, arm_fa726te_tune): Add max_insns_skipped field
    setting, using previous defaults or 1 for Cortex-A5.
    (arm_option_override): Set max_insns_skipped from current tuning.

Comments

Richard Earnshaw June 3, 2011, 9:45 a.m. UTC | #1
On Thu, 2011-06-02 at 17:41 +0100, Julian Brown wrote:
> On Wed, 01 Jun 2011 17:00:30 +0100
> Richard Earnshaw <rearnsha@arm.com> wrote:
> 
> > 
> > On Wed, 2011-06-01 at 16:49 +0100, Julian Brown wrote:
> > > This patch tweaks the behaviour of arm_final_prescan_insn when
> > > tuning for Cortex-A5 cores, since branches are cheaper than long
> > > sequences of conditionalised instructions on those processors. As
> > > posted in the previous patch, this provides a measurable increase
> > > in performance on a popular embedded benchmark.
> > > 
> > > (I didn't use the tuning infrastructure for this one, though it
> > > could easily be changed to do so, now I come to think of it.)
> >
> > I would much prefer that this was done through the tuning
> > infrastructure.  If one core likes it this way, there's a strong
> > chance of another one coming along that has similar preferences.
> 
> How does this version look? I've left the size-optimisation case the
> same (max_insns_skipped=6), but added a "tunable" integer to the
> tune_params structure allowing the speed-optimisation case to be varied
> according to the chosen target tuning.
> 
> To maintain existing semantics, this means duplicating the "fastmul"
> structure for the StrongARM (XScale also used the StrongARM
> setting, but already has its own tuning structure).
> 
> Minimally re-tested. OK to apply?
> 
> Thanks,
> 
> Julian
> 
> ChangeLog
> 
>     gcc/
>     * config/arm/arm-cores.def (strongarm, strongarm110, strongarm1100)
>     (strongarm1110): Use strongarm tuning.
>     * config/arm/arm-protos.h (tune_params): Add max_insns_skipped
>     field.
>     * config/arm/arm.c (arm_strongarm_tune): New.
>     (arm_slowmul_tune, arm_fastmul_tune, arm_xscale_tune, arm_9e_tune)
>     (arm_v6t2_tune, arm_cortex_tune, arm_cortex_a5_tune)
>     (arm_cortex_a9_tune, arm_fa726te_tune): Add max_insns_skipped field
>     setting, using previous defaults or 1 for Cortex-A5.
>     (arm_option_override): Set max_insns_skipped from current tuning.

OK.

R.
diff mbox

Patch

commit 2116062b95b55fc048d54321c8b41a4d83175430
Author: Julian Brown <julian@henry7.codesourcery.com>
Date:   Fri May 27 11:26:57 2011 -0700

    Tune max_insns_skipped for conditionalization for Cortex-A5.

diff --git a/gcc/config/arm/arm-cores.def b/gcc/config/arm/arm-cores.def
index 4ff2324..89697c0 100644
--- a/gcc/config/arm/arm-cores.def
+++ b/gcc/config/arm/arm-cores.def
@@ -70,10 +70,10 @@  ARM_CORE("arm7dmi",       arm7dmi,	3M,	FL_CO_PROC | FL_MODE26, fastmul)
 /* V4 Architecture Processors */
 ARM_CORE("arm8",          arm8,		4,	             FL_MODE26 | FL_LDSCHED, fastmul)
 ARM_CORE("arm810",        arm810,	4,	             FL_MODE26 | FL_LDSCHED, fastmul)
-ARM_CORE("strongarm",     strongarm,	4,	             FL_MODE26 | FL_LDSCHED | FL_STRONG, fastmul)
-ARM_CORE("strongarm110",  strongarm110,	4,	             FL_MODE26 | FL_LDSCHED | FL_STRONG, fastmul)
-ARM_CORE("strongarm1100", strongarm1100, 4,	             FL_MODE26 | FL_LDSCHED | FL_STRONG, fastmul)
-ARM_CORE("strongarm1110", strongarm1110, 4,	             FL_MODE26 | FL_LDSCHED | FL_STRONG, fastmul)
+ARM_CORE("strongarm",     strongarm,	4,	             FL_MODE26 | FL_LDSCHED | FL_STRONG, strongarm)
+ARM_CORE("strongarm110",  strongarm110,	4,	             FL_MODE26 | FL_LDSCHED | FL_STRONG, strongarm)
+ARM_CORE("strongarm1100", strongarm1100, 4,	             FL_MODE26 | FL_LDSCHED | FL_STRONG, strongarm)
+ARM_CORE("strongarm1110", strongarm1110, 4,	             FL_MODE26 | FL_LDSCHED | FL_STRONG, strongarm)
 ARM_CORE("fa526",         fa526,        4,                               FL_LDSCHED, fastmul)
 ARM_CORE("fa626",         fa626,        4,                               FL_LDSCHED, fastmul)
 
diff --git a/gcc/config/arm/arm-protos.h b/gcc/config/arm/arm-protos.h
index c104d74..67aee46 100644
--- a/gcc/config/arm/arm-protos.h
+++ b/gcc/config/arm/arm-protos.h
@@ -221,6 +221,9 @@  struct tune_params
   bool (*rtx_costs) (rtx, RTX_CODE, RTX_CODE, int *, bool);
   bool (*sched_adjust_cost) (rtx, rtx, rtx, int *);
   int constant_limit;
+  /* Maximum number of instructions to conditionalise in
+     arm_final_prescan_insn.  */
+  int max_insns_skipped;
   int num_prefetch_slots;
   int l1_cache_size;
   int l1_cache_line_size;
diff --git a/gcc/config/arm/arm.c b/gcc/config/arm/arm.c
index cd3f104..8f01202 100644
--- a/gcc/config/arm/arm.c
+++ b/gcc/config/arm/arm.c
@@ -857,6 +857,7 @@  const struct tune_params arm_slowmul_tune =
   arm_slowmul_rtx_costs,
   NULL,
   3,						/* Constant limit.  */
+  5,						/* Max cond insns.  */
   ARM_PREFETCH_NOT_BENEFICIAL,
   true,						/* Prefer constant pool.  */
   arm_default_branch_cost
@@ -867,6 +868,21 @@  const struct tune_params arm_fastmul_tune =
   arm_fastmul_rtx_costs,
   NULL,
   1,						/* Constant limit.  */
+  5,						/* Max cond insns.  */
+  ARM_PREFETCH_NOT_BENEFICIAL,
+  true,						/* Prefer constant pool.  */
+  arm_default_branch_cost
+};
+
+/* StrongARM has early execution of branches, so a sequence that is worth
+   skipping is shorter.  Set max_insns_skipped to a lower value.  */
+
+const struct tune_params arm_strongarm_tune =
+{
+  arm_fastmul_rtx_costs,
+  NULL,
+  1,						/* Constant limit.  */
+  3,						/* Max cond insns.  */
   ARM_PREFETCH_NOT_BENEFICIAL,
   true,						/* Prefer constant pool.  */
   arm_default_branch_cost
@@ -877,6 +893,7 @@  const struct tune_params arm_xscale_tune =
   arm_xscale_rtx_costs,
   xscale_sched_adjust_cost,
   2,						/* Constant limit.  */
+  3,						/* Max cond insns.  */
   ARM_PREFETCH_NOT_BENEFICIAL,
   true,						/* Prefer constant pool.  */
   arm_default_branch_cost
@@ -887,6 +904,7 @@  const struct tune_params arm_9e_tune =
   arm_9e_rtx_costs,
   NULL,
   1,						/* Constant limit.  */
+  5,						/* Max cond insns.  */
   ARM_PREFETCH_NOT_BENEFICIAL,
   true,						/* Prefer constant pool.  */
   arm_default_branch_cost
@@ -897,6 +915,7 @@  const struct tune_params arm_v6t2_tune =
   arm_9e_rtx_costs,
   NULL,
   1,						/* Constant limit.  */
+  5,						/* Max cond insns.  */
   ARM_PREFETCH_NOT_BENEFICIAL,
   false,					/* Prefer constant pool.  */
   arm_default_branch_cost
@@ -908,16 +927,21 @@  const struct tune_params arm_cortex_tune =
   arm_9e_rtx_costs,
   NULL,
   1,						/* Constant limit.  */
+  5,						/* Max cond insns.  */
   ARM_PREFETCH_NOT_BENEFICIAL,
   false,					/* Prefer constant pool.  */
   arm_default_branch_cost
 };
 
+/* Branches can be dual-issued on Cortex-A5, so conditional execution is
+   less appealing.  Set max_insns_skipped to a low value.  */
+
 const struct tune_params arm_cortex_a5_tune =
 {
   arm_9e_rtx_costs,
   NULL,
   1,						/* Constant limit.  */
+  1,						/* Max cond insns.  */
   ARM_PREFETCH_NOT_BENEFICIAL,
   false,					/* Prefer constant pool.  */
   arm_cortex_a5_branch_cost
@@ -928,6 +952,7 @@  const struct tune_params arm_cortex_a9_tune =
   arm_9e_rtx_costs,
   cortex_a9_sched_adjust_cost,
   1,						/* Constant limit.  */
+  5,						/* Max cond insns.  */
   ARM_PREFETCH_BENEFICIAL(4,32,32),
   false,					/* Prefer constant pool.  */
   arm_default_branch_cost
@@ -938,6 +963,7 @@  const struct tune_params arm_fa726te_tune =
   arm_9e_rtx_costs,
   fa726te_sched_adjust_cost,
   1,						/* Constant limit.  */
+  5,						/* Max cond insns.  */
   ARM_PREFETCH_NOT_BENEFICIAL,
   true,						/* Prefer constant pool.  */
   arm_default_branch_cost
@@ -1732,12 +1758,7 @@  arm_option_override (void)
       max_insns_skipped = 6;
     }
   else
-    {
-      /* StrongARM has early execution of branches, so a sequence
-         that is worth skipping is shorter.  */
-      if (arm_tune_strongarm)
-        max_insns_skipped = 3;
-    }
+    max_insns_skipped = current_tune->max_insns_skipped;
 
   /* Hot/Cold partitioning is not currently supported, since we can't
      handle literal pool placement in that case.  */