Patchwork [ARM] Define MAX_CONDITIONAL_EXECUTE

login
register
mail settings
Submitter Greta Yorsh
Date June 17, 2013, 11:18 a.m.
Message ID <000001ce6b4c$752923c0$5f7b6b40$@yorsh@arm.com>
Download mbox | patch
Permalink /patch/251817/
State New
Headers show

Comments

Greta Yorsh - June 17, 2013, 11:18 a.m.
This patch makes the following changes:
* Define MAX_CONDITIONAL_EXECUTE in arm backend using max_insns_skipped,
which is set based on the current tune. 
* Update max_insns_skipped for Cortex-A15 tune to be 2 (instead of 5).
* Use max_insns_skipped in thumb2_final_prescan_insn to decide when to
combine IT blocks
into larger IT blocks. Previously, max_insns_skipped was only used in 
arm_final_prescan_insn to decide when branch should be converted to
conditional execution. 

No regression on qemu for arm-none-eabi with cortex-a15 arm/thumb mode.
Bootstrap successful on Cortex-A15. 

Performance improvement on Cortex-A15 in both arm and thumb states on both
Dhrystone and Coremark, and improvement on Spec2000 in thumb state, with all
benchmarks showing improvements except three benchmarks in CFP2000 that have
slight regressions (189,183,178).

gcc/ChangeLog

2013-06-17  Greta Yorsh  <Greta.Yorsh@arm.com>

	* config/arm/arm.h (MAX_CONDITIONAL_EXECUTE): Define macro.
	* config/arm/arm-protos.h (arm_max_conditional_execute): New
	declaration.
	(tune_params): Update comment.
	* config/arm/arm.c (arm_cortex_a15_tune): Set max_cond_insns to 2.
	(arm_max_conditional_execute): New function.
	(thumb2_final_prescan_insn): Use max_insn_skipped and
	MAX_INSN_PER_IT_BLOCK to compute maximum instructions in a block.
Greta Yorsh - June 24, 2013, 9:58 a.m.
PING...
http://gcc.gnu.org/ml/gcc-patches/2013-06/msg00948.html

Thanks,
Greta

> -----Original Message-----
> From: Greta Yorsh [mailto:greta.yorsh@arm.com]
> Sent: 17 June 2013 12:19
> To: GCC Patches
> Cc: Richard Earnshaw; Ramana Radhakrishnan; paul@codesourcery.com;
> nickc@redhat.com
> Subject: [PATCH,ARM] Define MAX_CONDITIONAL_EXECUTE
> 
> This patch makes the following changes:
> * Define MAX_CONDITIONAL_EXECUTE in arm backend using
> max_insns_skipped,
> which is set based on the current tune.
> * Update max_insns_skipped for Cortex-A15 tune to be 2 (instead of 5).
> * Use max_insns_skipped in thumb2_final_prescan_insn to decide when to
> combine IT blocks
> into larger IT blocks. Previously, max_insns_skipped was only used in
> arm_final_prescan_insn to decide when branch should be converted to
> conditional execution.
> 
> No regression on qemu for arm-none-eabi with cortex-a15 arm/thumb mode.
> Bootstrap successful on Cortex-A15.
> 
> Performance improvement on Cortex-A15 in both arm and thumb states on
> both
> Dhrystone and Coremark, and improvement on Spec2000 in thumb state,
> with all
> benchmarks showing improvements except three benchmarks in CFP2000 that
> have
> slight regressions (189,183,178).
> 
> gcc/ChangeLog
> 
> 2013-06-17  Greta Yorsh  <Greta.Yorsh@arm.com>
> 
> 	* config/arm/arm.h (MAX_CONDITIONAL_EXECUTE): Define macro.
> 	* config/arm/arm-protos.h (arm_max_conditional_execute): New
> 	declaration.
> 	(tune_params): Update comment.
> 	* config/arm/arm.c (arm_cortex_a15_tune): Set max_cond_insns to
> 2.
> 	(arm_max_conditional_execute): New function.
> 	(thumb2_final_prescan_insn): Use max_insn_skipped and
> 	MAX_INSN_PER_IT_BLOCK to compute maximum instructions in a block.

Patch

diff --git a/gcc/config/arm/arm-protos.h b/gcc/config/arm/arm-protos.h
index c791341..374c364 100644
--- a/gcc/config/arm/arm-protos.h
+++ b/gcc/config/arm/arm-protos.h
@@ -227,6 +227,8 @@  extern const char *arm_mangle_type (const_tree);
 
 extern void arm_order_regs_for_local_alloc (void);
 
+extern int arm_max_conditional_execute ();
+
 /* Vectorizer cost model implementation.  */
 struct cpu_vec_costs {
   const int scalar_stmt_cost;   /* Cost of any scalar operation, excluding
@@ -256,8 +258,7 @@  struct tune_params
   bool (*rtx_costs) (rtx, RTX_CODE, RTX_CODE, int *, bool);
   bool (*sched_adjust_cost) (rtx, rtx, rtx, int *);
   int constant_limit;
-  /* Maximum number of instructions to conditionalise in
-     arm_final_prescan_insn.  */
+  /* Maximum number of instructions to conditionalise.  */
   int max_insns_skipped;
   int num_prefetch_slots;
   int l1_cache_size;
diff --git a/gcc/config/arm/arm.c b/gcc/config/arm/arm.c
index 43dfe27..6ca81eb 100644
--- a/gcc/config/arm/arm.c
+++ b/gcc/config/arm/arm.c
@@ -1054,7 +1057,7 @@  const struct tune_params arm_cortex_a15_tune =
   arm_9e_rtx_costs,
   NULL,
   1,						/* Constant limit.  */
-  5,						/* Max cond insns.  */
+  2,						/* Max cond insns.  */
   ARM_PREFETCH_NOT_BENEFICIAL,
   false,					/* Prefer constant pool.  */
   arm_default_branch_cost,
@@ -9101,6 +9104,12 @@  arm_adjust_cost (rtx insn, rtx link, rtx dep, int cost)
   return cost;
 }
 
+int
+arm_max_conditional_execute (void)
+{
+  return max_insns_skipped;
+}
+
 static int
 arm_default_branch_cost (bool speed_p, bool predictable_p ATTRIBUTE_UNUSED)
 {
@@ -19488,6 +19497,13 @@  thumb2_final_prescan_insn (rtx insn)
   enum arm_cond_code code;
   int n;
   int mask;
+  int max;
+
+  /* Maximum number of conditionally executed instructions in a block
+     is minimum of the two max values: maximum allowed in an IT block
+     and maximum that is beneficial according to the cost model and tune.  */
+  max = (max_insns_skipped < MAX_INSN_PER_IT_BLOCK) ?
+    max_insns_skipped : MAX_INSN_PER_IT_BLOCK;
 
   /* Remove the previous insn from the count of insns to be output.  */
   if (arm_condexec_count)
@@ -19530,9 +19546,9 @@  thumb2_final_prescan_insn (rtx insn)
       /* ??? Recognize conditional jumps, and combine them with IT blocks.  */
       if (GET_CODE (body) != COND_EXEC)
 	break;
-      /* Allow up to 4 conditionally executed instructions in a block.  */
+      /* Maximum number of conditionally executed instructions in a block.  */
       n = get_attr_ce_count (insn);
-      if (arm_condexec_masklen + n > MAX_INSN_PER_IT_BLOCK)
+      if (arm_condexec_masklen + n > max)
 	break;
 
       predicate = COND_EXEC_TEST (body);
diff --git a/gcc/config/arm/arm.h b/gcc/config/arm/arm.h
index 3a49a90..387d271 100644
--- a/gcc/config/arm/arm.h
+++ b/gcc/config/arm/arm.h
@@ -183,6 +183,11 @@  extern arm_cc arm_current_cc;
 
 #define ARM_INVERSE_CONDITION_CODE(X)  ((arm_cc) (((int)X) ^ 1))
 
+/* The maximaum number of instructions that is beneficial to
+   conditionally execute. */
+#undef MAX_CONDITIONAL_EXECUTE
+#define MAX_CONDITIONAL_EXECUTE arm_max_conditional_execute ()
+
 extern int arm_target_label;
 extern int arm_ccfsm_state;
 extern GTY(()) rtx arm_target_insn;