Patchwork [arm] Define LOGICAL_OP_NON_SHORT_CIRCUIT in ARM back end

login
register
mail settings
Submitter Bin Cheng
Date July 26, 2012, 10:21 a.m.
Message ID <001c01cd6b18$6057db10$21079130$@cheng@arm.com>
Download mbox | patch
Permalink /patch/173395/
State New
Headers show

Comments

Bin Cheng - July 26, 2012, 10:21 a.m.
Hi,
This patch defines LOGICAL_OP_NON_SHORT_CIRCUIT in arm back-end by calling a
new hook function(logical_op_non_short_circuit") in tune_params structure.
For most cases the value of the macro is same as the default version in
fold-const.c, while it is "FALSE" to prefer short circuit when optimizing
for size on armv6-m processors. This brings us ~0.2% code size improvement
for CSiBE benchmark on cortex-m0.
 
Also tunes on other ARM processes could be followed.

No regression introduced, is it OK?
Thanks

2012-07-26  Bin Cheng  <bin.cheng@arm.com>

	* config/arm/arm-cores.def (cortex-m1, cortex-m0, cortex-m0plus):
Use v6m.
	* config/arm/arm-protos.h (tune_params): Add
logical_op_non_short_circuit hook.
	* config/arm/arm.c (arm_default_logical_op_non_short_circuit)
	(arm_v6m_logical_op_non_short_circuit): New functions.
	(arm_slowmul_tune, arm_fastmul_tune, arm_strongarm_tune,
arm_xscale_tune)
	(arm_9e_tune, arm_v6t2_tune, arm_cortex_tune, arm_cortex_a15_tune)
	(arm_cortex_a5_tune, arm_cortex_a9_tune, arm_fa726te_tune): Set the
field
	logical_op_non_short_circuit to
arm_default_logical_op_non_short_circuit.
	(arm_v6m_tune): New tune_params struct.
	* config/arm/arm.h (LOGICAL_OP_NON_SHORT_CIRCUIT): Use the hook
	logical_op_non_short_circuit from current_tune structure.
Richard Earnshaw - July 26, 2012, 1:15 p.m.
On 26/07/12 11:21, Bin Cheng wrote:
> Hi,
> This patch defines LOGICAL_OP_NON_SHORT_CIRCUIT in arm back-end by calling a
> new hook function(logical_op_non_short_circuit") in tune_params structure.
> For most cases the value of the macro is same as the default version in
> fold-const.c, while it is "FALSE" to prefer short circuit when optimizing
> for size on armv6-m processors. This brings us ~0.2% code size improvement
> for CSiBE benchmark on cortex-m0.
>  
> Also tunes on other ARM processes could be followed.
> 
> No regression introduced, is it OK?
> Thanks
> 

This all looks way too complex.  It shouldn't be necessary to
write a whole load of per-tune method calls to deal with this.  What we
need is a simple parameter in the tune table.

So, LOGICAL_OP_NON_SHORT_CIRCUIT, should effectively be:

    if (OPTIMIZE_SIZE)
	return ANSWER_BASED_ON_ISA;
    else
	return tune->log_op_non_short_circuit[TARGET_ARM ? 0 : 1];

Where tune->log_op_non_short_circuit[2] is defined for each tuning
table, to cover ARM and Thumb states.

ANSWER_BASED_ON_ISA will do the right thing for Thumb1, Thumb2 and ARM
depending on which leads to smallest code.

R.

> 2012-07-26  Bin Cheng  <bin.cheng@arm.com>
> 
> 	* config/arm/arm-cores.def (cortex-m1, cortex-m0, cortex-m0plus):
> Use v6m.
> 	* config/arm/arm-protos.h (tune_params): Add
> logical_op_non_short_circuit hook.
> 	* config/arm/arm.c (arm_default_logical_op_non_short_circuit)
> 	(arm_v6m_logical_op_non_short_circuit): New functions.
> 	(arm_slowmul_tune, arm_fastmul_tune, arm_strongarm_tune,
> arm_xscale_tune)
> 	(arm_9e_tune, arm_v6t2_tune, arm_cortex_tune, arm_cortex_a15_tune)
> 	(arm_cortex_a5_tune, arm_cortex_a9_tune, arm_fa726te_tune): Set the
> field
> 	logical_op_non_short_circuit to
> arm_default_logical_op_non_short_circuit.
> 	(arm_v6m_tune): New tune_params struct.
> 	* config/arm/arm.h (LOGICAL_OP_NON_SHORT_CIRCUIT): Use the hook
> 	logical_op_non_short_circuit from current_tune structure.
> 
> 
> short-circuit-20120726.txt
> 
> 
> Index: gcc/config/arm/arm.c
> ===================================================================
> --- gcc/config/arm/arm.c	(revision 189835)
> +++ gcc/config/arm/arm.c	(working copy)
> @@ -265,6 +265,9 @@
>  static int arm_default_branch_cost (bool, bool);
>  static int arm_cortex_a5_branch_cost (bool, bool);
>  
> +static bool arm_default_logical_op_non_short_circuit (void);
> +static bool arm_v6m_logical_op_non_short_circuit (void);
> +
>  static bool arm_vectorize_vec_perm_const_ok (enum machine_mode vmode,
>  					     const unsigned char *sel);
>  
> @@ -876,7 +879,8 @@
>    ARM_PREFETCH_NOT_BENEFICIAL,
>    true,						/* Prefer constant pool.  */
>    arm_default_branch_cost,
> -  false                                         /* Prefer LDRD/STRD.  */
> +  false,					/* Prefer LDRD/STRD.  */
> +  arm_default_logical_op_non_short_circuit,
>  };
>  
>  const struct tune_params arm_fastmul_tune =
> @@ -888,7 +892,8 @@
>    ARM_PREFETCH_NOT_BENEFICIAL,
>    true,						/* Prefer constant pool.  */
>    arm_default_branch_cost,
> -  false                                         /* Prefer LDRD/STRD.  */
> +  false,					/* Prefer LDRD/STRD.  */
> +  arm_default_logical_op_non_short_circuit,
>  };
>  
>  /* StrongARM has early execution of branches, so a sequence that is worth
> @@ -903,7 +908,8 @@
>    ARM_PREFETCH_NOT_BENEFICIAL,
>    true,						/* Prefer constant pool.  */
>    arm_default_branch_cost,
> -  false                                         /* Prefer LDRD/STRD.  */
> +  false,					/* Prefer LDRD/STRD.  */
> +  arm_default_logical_op_non_short_circuit,
>  };
>  
>  const struct tune_params arm_xscale_tune =
> @@ -915,7 +921,8 @@
>    ARM_PREFETCH_NOT_BENEFICIAL,
>    true,						/* Prefer constant pool.  */
>    arm_default_branch_cost,
> -  false                                         /* Prefer LDRD/STRD.  */
> +  false,					/* Prefer LDRD/STRD.  */
> +  arm_default_logical_op_non_short_circuit,
>  };
>  
>  const struct tune_params arm_9e_tune =
> @@ -927,7 +934,8 @@
>    ARM_PREFETCH_NOT_BENEFICIAL,
>    true,						/* Prefer constant pool.  */
>    arm_default_branch_cost,
> -  false                                         /* Prefer LDRD/STRD.  */
> +  false,					/* Prefer LDRD/STRD.  */
> +  arm_default_logical_op_non_short_circuit,
>  };
>  
>  const struct tune_params arm_v6t2_tune =
> @@ -939,7 +947,8 @@
>    ARM_PREFETCH_NOT_BENEFICIAL,
>    false,					/* Prefer constant pool.  */
>    arm_default_branch_cost,
> -  false                                         /* Prefer LDRD/STRD.  */
> +  false,					/* Prefer LDRD/STRD.  */
> +  arm_default_logical_op_non_short_circuit,
>  };
>  
>  /* Generic Cortex tuning.  Use more specific tunings if appropriate.  */
> @@ -952,7 +961,8 @@
>    ARM_PREFETCH_NOT_BENEFICIAL,
>    false,					/* Prefer constant pool.  */
>    arm_default_branch_cost,
> -  false                                         /* Prefer LDRD/STRD.  */
> +  false,					/* Prefer LDRD/STRD.  */
> +  arm_default_logical_op_non_short_circuit,
>  };
>  
>  const struct tune_params arm_cortex_a15_tune =
> @@ -964,7 +974,8 @@
>    ARM_PREFETCH_NOT_BENEFICIAL,
>    false,					/* Prefer constant pool.  */
>    arm_default_branch_cost,
> -  true                                          /* Prefer LDRD/STRD.  */
> +  true,						/* Prefer LDRD/STRD.  */
> +  arm_default_logical_op_non_short_circuit,
>  };
>  
>  /* Branches can be dual-issued on Cortex-A5, so conditional execution is
> @@ -979,7 +990,8 @@
>    ARM_PREFETCH_NOT_BENEFICIAL,
>    false,					/* Prefer constant pool.  */
>    arm_cortex_a5_branch_cost,
> -  false                                         /* Prefer LDRD/STRD.  */
> +  false,					/* Prefer LDRD/STRD.  */
> +  arm_default_logical_op_non_short_circuit,
>  };
>  
>  const struct tune_params arm_cortex_a9_tune =
> @@ -991,9 +1003,25 @@
>    ARM_PREFETCH_BENEFICIAL(4,32,32),
>    false,					/* Prefer constant pool.  */
>    arm_default_branch_cost,
> -  false                                         /* Prefer LDRD/STRD.  */
> +  false,					/* Prefer LDRD/STRD.  */
> +  arm_default_logical_op_non_short_circuit,
>  };
>  
> +/* The arm_v6m_tune is duplicated from arm_cortex_tune, rather than
> +   arm_v6t2_tune. It is used for cortex-m0, cortex-m1 and cortex-m0plus.  */
> +const struct tune_params arm_v6m_tune =
> +{
> +  arm_9e_rtx_costs,
> +  NULL,
> +  1,						/* Constant limit.  */
> +  5,						/* Max cond insns.  */
> +  ARM_PREFETCH_NOT_BENEFICIAL,
> +  false,					/* Prefer constant pool.  */
> +  arm_default_branch_cost,
> +  false,					/* Prefer LDRD/STRD.  */
> +  arm_v6m_logical_op_non_short_circuit,
> +};
> +
>  const struct tune_params arm_fa726te_tune =
>  {
>    arm_9e_rtx_costs,
> @@ -1003,7 +1031,8 @@
>    ARM_PREFETCH_NOT_BENEFICIAL,
>    true,						/* Prefer constant pool.  */
>    arm_default_branch_cost,
> -  false                                         /* Prefer LDRD/STRD.  */
> +  false,					/* Prefer LDRD/STRD.  */
> +  arm_default_logical_op_non_short_circuit,
>  };
>  
>  
> @@ -8637,7 +8666,24 @@
>  
>    return cost;
>  }
> + 
> +static bool
> +arm_default_logical_op_non_short_circuit (void)
> +{
> +  return (BRANCH_COST (optimize_function_for_speed_p (cfun),
> +		       false) >= 2);
> +}
>  
> +static bool
> +arm_v6m_logical_op_non_short_circuit (void)
> +{
> +  /* Prefer short circuit operation on armv6-m when optimizing for size.  */
> +  if (optimize_size)
> +    return false;
> +
> +  return arm_default_logical_op_non_short_circuit ();
> +}
> +
>  static int
>  arm_default_branch_cost (bool speed_p, bool predictable_p ATTRIBUTE_UNUSED)
>  {
> Index: gcc/config/arm/arm.h
> ===================================================================
> --- gcc/config/arm/arm.h	(revision 189835)
> +++ gcc/config/arm/arm.h	(working copy)
> @@ -1994,10 +1994,14 @@
>     || (X) == arg_pointer_rtx)
>  
>  /* Try to generate sequences that don't involve branches, we can then use
> -   conditional instructions */
> +   conditional instructions.  */
>  #define BRANCH_COST(speed_p, predictable_p) \
>    (current_tune->branch_cost (speed_p, predictable_p))
>  
> +/* False if short circuit operation is preferred.  */
> +#define LOGICAL_OP_NON_SHORT_CIRCUIT				\
> +  (current_tune->logical_op_non_short_circuit ())
> +
>  
>  /* Position Independent Code.  */
>  /* We decide which register to use based on the compilation options and
> Index: gcc/config/arm/arm-cores.def
> ===================================================================
> --- gcc/config/arm/arm-cores.def	(revision 189835)
> +++ gcc/config/arm/arm-cores.def	(working copy)
> @@ -135,6 +135,6 @@
>  ARM_CORE("cortex-r5",	  cortexr5,	7R,				 FL_LDSCHED | FL_ARM_DIV, cortex)
>  ARM_CORE("cortex-m4",	  cortexm4,	7EM,				 FL_LDSCHED, cortex)
>  ARM_CORE("cortex-m3",	  cortexm3,	7M,				 FL_LDSCHED, cortex)
> -ARM_CORE("cortex-m1",	  cortexm1,	6M,				 FL_LDSCHED, cortex)
> -ARM_CORE("cortex-m0",	  cortexm0,	6M,				 FL_LDSCHED, cortex)
> -ARM_CORE("cortex-m0plus", cortexm0plus,	6M,				 FL_LDSCHED, cortex)
> +ARM_CORE("cortex-m1",	  cortexm1,	6M,				 FL_LDSCHED, v6m)
> +ARM_CORE("cortex-m0",	  cortexm0,	6M,				 FL_LDSCHED, v6m)
> +ARM_CORE("cortex-m0plus", cortexm0plus,	6M,				 FL_LDSCHED, v6m)
> Index: gcc/config/arm/arm-protos.h
> ===================================================================
> --- gcc/config/arm/arm-protos.h	(revision 189835)
> +++ gcc/config/arm/arm-protos.h	(working copy)
> @@ -240,6 +240,7 @@
>    int (*branch_cost) (bool, bool);
>    /* Prefer STRD/LDRD instructions over PUSH/POP/LDM/STM.  */
>    bool prefer_ldrd_strd;
> +  bool (*logical_op_non_short_circuit) (void);
>  };
>  
>  extern const struct tune_params *current_tune;
>

Patch

Index: gcc/config/arm/arm.c
===================================================================
--- gcc/config/arm/arm.c	(revision 189835)
+++ gcc/config/arm/arm.c	(working copy)
@@ -265,6 +265,9 @@ 
 static int arm_default_branch_cost (bool, bool);
 static int arm_cortex_a5_branch_cost (bool, bool);
 
+static bool arm_default_logical_op_non_short_circuit (void);
+static bool arm_v6m_logical_op_non_short_circuit (void);
+
 static bool arm_vectorize_vec_perm_const_ok (enum machine_mode vmode,
 					     const unsigned char *sel);
 
@@ -876,7 +879,8 @@ 
   ARM_PREFETCH_NOT_BENEFICIAL,
   true,						/* Prefer constant pool.  */
   arm_default_branch_cost,
-  false                                         /* Prefer LDRD/STRD.  */
+  false,					/* Prefer LDRD/STRD.  */
+  arm_default_logical_op_non_short_circuit,
 };
 
 const struct tune_params arm_fastmul_tune =
@@ -888,7 +892,8 @@ 
   ARM_PREFETCH_NOT_BENEFICIAL,
   true,						/* Prefer constant pool.  */
   arm_default_branch_cost,
-  false                                         /* Prefer LDRD/STRD.  */
+  false,					/* Prefer LDRD/STRD.  */
+  arm_default_logical_op_non_short_circuit,
 };
 
 /* StrongARM has early execution of branches, so a sequence that is worth
@@ -903,7 +908,8 @@ 
   ARM_PREFETCH_NOT_BENEFICIAL,
   true,						/* Prefer constant pool.  */
   arm_default_branch_cost,
-  false                                         /* Prefer LDRD/STRD.  */
+  false,					/* Prefer LDRD/STRD.  */
+  arm_default_logical_op_non_short_circuit,
 };
 
 const struct tune_params arm_xscale_tune =
@@ -915,7 +921,8 @@ 
   ARM_PREFETCH_NOT_BENEFICIAL,
   true,						/* Prefer constant pool.  */
   arm_default_branch_cost,
-  false                                         /* Prefer LDRD/STRD.  */
+  false,					/* Prefer LDRD/STRD.  */
+  arm_default_logical_op_non_short_circuit,
 };
 
 const struct tune_params arm_9e_tune =
@@ -927,7 +934,8 @@ 
   ARM_PREFETCH_NOT_BENEFICIAL,
   true,						/* Prefer constant pool.  */
   arm_default_branch_cost,
-  false                                         /* Prefer LDRD/STRD.  */
+  false,					/* Prefer LDRD/STRD.  */
+  arm_default_logical_op_non_short_circuit,
 };
 
 const struct tune_params arm_v6t2_tune =
@@ -939,7 +947,8 @@ 
   ARM_PREFETCH_NOT_BENEFICIAL,
   false,					/* Prefer constant pool.  */
   arm_default_branch_cost,
-  false                                         /* Prefer LDRD/STRD.  */
+  false,					/* Prefer LDRD/STRD.  */
+  arm_default_logical_op_non_short_circuit,
 };
 
 /* Generic Cortex tuning.  Use more specific tunings if appropriate.  */
@@ -952,7 +961,8 @@ 
   ARM_PREFETCH_NOT_BENEFICIAL,
   false,					/* Prefer constant pool.  */
   arm_default_branch_cost,
-  false                                         /* Prefer LDRD/STRD.  */
+  false,					/* Prefer LDRD/STRD.  */
+  arm_default_logical_op_non_short_circuit,
 };
 
 const struct tune_params arm_cortex_a15_tune =
@@ -964,7 +974,8 @@ 
   ARM_PREFETCH_NOT_BENEFICIAL,
   false,					/* Prefer constant pool.  */
   arm_default_branch_cost,
-  true                                          /* Prefer LDRD/STRD.  */
+  true,						/* Prefer LDRD/STRD.  */
+  arm_default_logical_op_non_short_circuit,
 };
 
 /* Branches can be dual-issued on Cortex-A5, so conditional execution is
@@ -979,7 +990,8 @@ 
   ARM_PREFETCH_NOT_BENEFICIAL,
   false,					/* Prefer constant pool.  */
   arm_cortex_a5_branch_cost,
-  false                                         /* Prefer LDRD/STRD.  */
+  false,					/* Prefer LDRD/STRD.  */
+  arm_default_logical_op_non_short_circuit,
 };
 
 const struct tune_params arm_cortex_a9_tune =
@@ -991,9 +1003,25 @@ 
   ARM_PREFETCH_BENEFICIAL(4,32,32),
   false,					/* Prefer constant pool.  */
   arm_default_branch_cost,
-  false                                         /* Prefer LDRD/STRD.  */
+  false,					/* Prefer LDRD/STRD.  */
+  arm_default_logical_op_non_short_circuit,
 };
 
+/* The arm_v6m_tune is duplicated from arm_cortex_tune, rather than
+   arm_v6t2_tune. It is used for cortex-m0, cortex-m1 and cortex-m0plus.  */
+const struct tune_params arm_v6m_tune =
+{
+  arm_9e_rtx_costs,
+  NULL,
+  1,						/* Constant limit.  */
+  5,						/* Max cond insns.  */
+  ARM_PREFETCH_NOT_BENEFICIAL,
+  false,					/* Prefer constant pool.  */
+  arm_default_branch_cost,
+  false,					/* Prefer LDRD/STRD.  */
+  arm_v6m_logical_op_non_short_circuit,
+};
+
 const struct tune_params arm_fa726te_tune =
 {
   arm_9e_rtx_costs,
@@ -1003,7 +1031,8 @@ 
   ARM_PREFETCH_NOT_BENEFICIAL,
   true,						/* Prefer constant pool.  */
   arm_default_branch_cost,
-  false                                         /* Prefer LDRD/STRD.  */
+  false,					/* Prefer LDRD/STRD.  */
+  arm_default_logical_op_non_short_circuit,
 };
 
 
@@ -8637,7 +8666,24 @@ 
 
   return cost;
 }
+ 
+static bool
+arm_default_logical_op_non_short_circuit (void)
+{
+  return (BRANCH_COST (optimize_function_for_speed_p (cfun),
+		       false) >= 2);
+}
 
+static bool
+arm_v6m_logical_op_non_short_circuit (void)
+{
+  /* Prefer short circuit operation on armv6-m when optimizing for size.  */
+  if (optimize_size)
+    return false;
+
+  return arm_default_logical_op_non_short_circuit ();
+}
+
 static int
 arm_default_branch_cost (bool speed_p, bool predictable_p ATTRIBUTE_UNUSED)
 {
Index: gcc/config/arm/arm.h
===================================================================
--- gcc/config/arm/arm.h	(revision 189835)
+++ gcc/config/arm/arm.h	(working copy)
@@ -1994,10 +1994,14 @@ 
    || (X) == arg_pointer_rtx)
 
 /* Try to generate sequences that don't involve branches, we can then use
-   conditional instructions */
+   conditional instructions.  */
 #define BRANCH_COST(speed_p, predictable_p) \
   (current_tune->branch_cost (speed_p, predictable_p))
 
+/* False if short circuit operation is preferred.  */
+#define LOGICAL_OP_NON_SHORT_CIRCUIT				\
+  (current_tune->logical_op_non_short_circuit ())
+
 
 /* Position Independent Code.  */
 /* We decide which register to use based on the compilation options and
Index: gcc/config/arm/arm-cores.def
===================================================================
--- gcc/config/arm/arm-cores.def	(revision 189835)
+++ gcc/config/arm/arm-cores.def	(working copy)
@@ -135,6 +135,6 @@ 
 ARM_CORE("cortex-r5",	  cortexr5,	7R,				 FL_LDSCHED | FL_ARM_DIV, cortex)
 ARM_CORE("cortex-m4",	  cortexm4,	7EM,				 FL_LDSCHED, cortex)
 ARM_CORE("cortex-m3",	  cortexm3,	7M,				 FL_LDSCHED, cortex)
-ARM_CORE("cortex-m1",	  cortexm1,	6M,				 FL_LDSCHED, cortex)
-ARM_CORE("cortex-m0",	  cortexm0,	6M,				 FL_LDSCHED, cortex)
-ARM_CORE("cortex-m0plus", cortexm0plus,	6M,				 FL_LDSCHED, cortex)
+ARM_CORE("cortex-m1",	  cortexm1,	6M,				 FL_LDSCHED, v6m)
+ARM_CORE("cortex-m0",	  cortexm0,	6M,				 FL_LDSCHED, v6m)
+ARM_CORE("cortex-m0plus", cortexm0plus,	6M,				 FL_LDSCHED, v6m)
Index: gcc/config/arm/arm-protos.h
===================================================================
--- gcc/config/arm/arm-protos.h	(revision 189835)
+++ gcc/config/arm/arm-protos.h	(working copy)
@@ -240,6 +240,7 @@ 
   int (*branch_cost) (bool, bool);
   /* Prefer STRD/LDRD instructions over PUSH/POP/LDM/STM.  */
   bool prefer_ldrd_strd;
+  bool (*logical_op_non_short_circuit) (void);
 };
 
 extern const struct tune_params *current_tune;