diff mbox

[4/4,v2,AArch64] Cost CCMP instruction sequences to choose better expand order

Message ID 000701d12166$cec93a50$6c5baef0$@arm.com
State New
Headers show

Commit Message

Wilco Dijkstra Nov. 17, 2015, 6:36 p.m. UTC
(v2 cleans up enum use)

This patch adds CCMP selection based on rtx costs. This is based on Jiong's
already approved patch
https://gcc.gnu.org/ml/gcc-patches/2015-09/msg01434.html with some minor
refactoring and enum cleanup plus the tests updated.

OK for commit?

ChangeLog:
2015-11-13  Jiong Wang  <jiong.wang@arm.com>
2015-11-18  Wilco Dijkstra  <wdijkstr@arm.com>

gcc/
	* ccmp.c (expand_ccmp_expr_1): Cost the instruction sequences
	generated from different expand order.  Cleanup enum use.
  
gcc/testsuite/
	* gcc.target/aarch64/ccmp_1.c: Update test.


---
 gcc/ccmp.c                                | 65
++++++++++++++++++++++++-------
 gcc/testsuite/gcc.target/aarch64/ccmp_1.c | 15 ++++++-
 2 files changed, 64 insertions(+), 16 deletions(-)
diff mbox

Patch

diff --git a/gcc/ccmp.c b/gcc/ccmp.c
index 3698a7d..0c677fd 100644
--- a/gcc/ccmp.c
+++ b/gcc/ccmp.c
@@ -51,6 +51,7 @@  along with GCC; see the file COPYING3.  If not see
 #include "tree-outof-ssa.h"
 #include "cfgexpand.h"
 #include "ccmp.h"
+#include "predict.h"
 
 /* The following functions expand conditional compare (CCMP) instructions.
    Here is a short description about the over all algorithm:
@@ -88,7 +89,7 @@  ccmp_candidate_p (gimple *g)
   tree rhs = gimple_assign_rhs_to_tree (g);
   tree lhs, op0, op1;
   gimple *gs0, *gs1;
-  enum tree_code tcode, tcode0, tcode1;
+  tree_code tcode, tcode0, tcode1;
   tcode = TREE_CODE (rhs);
 
   if (tcode != BIT_AND_EXPR && tcode != BIT_IOR_EXPR)
@@ -135,10 +136,10 @@  ccmp_candidate_p (gimple *g)
    PREP_SEQ returns all insns to prepare opearands for compare.
    GEN_SEQ returns all compare insns.  */
 static rtx
-expand_ccmp_next (gimple *g, enum tree_code code, rtx prev,
+expand_ccmp_next (gimple *g, tree_code code, rtx prev,
 		  rtx *prep_seq, rtx *gen_seq)
 {
-  enum rtx_code rcode;
+  rtx_code rcode;
   int unsignedp = TYPE_UNSIGNED (TREE_TYPE (gimple_assign_rhs1 (g)));
 
   gcc_assert (code == BIT_AND_EXPR || code == BIT_IOR_EXPR);
@@ -165,13 +166,15 @@  expand_ccmp_next (gimple *g, enum tree_code code, rtx
prev,
 static rtx
 expand_ccmp_expr_1 (gimple *g, rtx *prep_seq, rtx *gen_seq)
 {
+  rtx prep_seq_1, gen_seq_1;
+  rtx prep_seq_2, gen_seq_2;
   tree exp = gimple_assign_rhs_to_tree (g);
-  enum tree_code code = TREE_CODE (exp);
+  tree_code code = TREE_CODE (exp);
   gimple *gs0 = get_gimple_for_ssa_name (TREE_OPERAND (exp, 0));
   gimple *gs1 = get_gimple_for_ssa_name (TREE_OPERAND (exp, 1));
   rtx tmp;
-  enum tree_code code0 = gimple_assign_rhs_code (gs0);
-  enum tree_code code1 = gimple_assign_rhs_code (gs1);
+  tree_code code0 = gimple_assign_rhs_code (gs0);
+  tree_code code1 = gimple_assign_rhs_code (gs1);
 
   gcc_assert (code == BIT_AND_EXPR || code == BIT_IOR_EXPR);
   gcc_assert (gs0 && gs1 && is_gimple_assign (gs0) && is_gimple_assign
(gs1));
@@ -180,19 +183,53 @@  expand_ccmp_expr_1 (gimple *g, rtx *prep_seq, rtx
*gen_seq)
     {
       if (TREE_CODE_CLASS (code1) == tcc_comparison)
 	{
-	  int unsignedp0;
-	  enum rtx_code rcode0;
+	  int unsignedp0, unsignedp1;
+	  rtx_code rcode0, rcode1;
+	  int speed_p = optimize_insn_for_speed_p ();
+	  rtx tmp2, ret, ret2;
+	  unsigned cost1 = MAX_COST;
+	  unsigned cost2 = MAX_COST;
 
 	  unsignedp0 = TYPE_UNSIGNED (TREE_TYPE (gimple_assign_rhs1 (gs0)));
+	  unsignedp1 = TYPE_UNSIGNED (TREE_TYPE (gimple_assign_rhs1 (gs1)));
 	  rcode0 = get_rtx_code (code0, unsignedp0);
+	  rcode1 = get_rtx_code (code1, unsignedp1);
 
-	  tmp = targetm.gen_ccmp_first (prep_seq, gen_seq, rcode0,
+	  tmp = targetm.gen_ccmp_first (&prep_seq_1, &gen_seq_1, rcode0,
 					gimple_assign_rhs1 (gs0),
 					gimple_assign_rhs2 (gs0));
-	  if (!tmp)
+
+	  tmp2 = targetm.gen_ccmp_first (&prep_seq_2, &gen_seq_2, rcode1,
+					 gimple_assign_rhs1 (gs1),
+					 gimple_assign_rhs2 (gs1));
+
+	  if (!tmp && !tmp2)
 	    return NULL_RTX;
 
-	  return expand_ccmp_next (gs1, code, tmp, prep_seq, gen_seq);
+	  if (tmp != NULL)
+	    {
+	      ret = expand_ccmp_next (gs1, code, tmp, &prep_seq_1,
&gen_seq_1);
+	      cost1 = seq_cost (safe_as_a <rtx_insn *> (prep_seq_1),
speed_p);
+	      cost1 += seq_cost (safe_as_a <rtx_insn *> (gen_seq_1),
speed_p);
+	    }
+	  if (tmp2 != NULL)
+	    {
+	      ret2 = expand_ccmp_next (gs0, code, tmp2, &prep_seq_2,
+				       &gen_seq_2);
+	      cost2 = seq_cost (safe_as_a <rtx_insn *> (prep_seq_2),
speed_p);
+	      cost2 += seq_cost (safe_as_a <rtx_insn *> (gen_seq_2),
speed_p);
+	    }
+
+	  if (cost2 < cost1)
+	    {
+	      *prep_seq = prep_seq_2;
+	      *gen_seq = gen_seq_2;
+	      return ret2;
+	    }
+
+	  *prep_seq = prep_seq_1;
+	  *gen_seq = gen_seq_1;
+	  return ret;
 	}
       else
 	{
@@ -246,8 +283,8 @@  expand_ccmp_expr (gimple *g)
 
   if (tmp)
     {
-      enum insn_code icode;
-      enum machine_mode cc_mode = CCmode;
+      insn_code icode;
+      machine_mode cc_mode = CCmode;
       tree lhs = gimple_assign_lhs (g);
       rtx_code cmp_code = GET_CODE (tmp);
 
@@ -257,7 +294,7 @@  expand_ccmp_expr (gimple *g)
       icode = optab_handler (cstore_optab, cc_mode);
       if (icode != CODE_FOR_nothing)
 	{
-	  enum machine_mode mode = TYPE_MODE (TREE_TYPE (lhs));
+	  machine_mode mode = TYPE_MODE (TREE_TYPE (lhs));
 	  rtx target = gen_reg_rtx (mode);
 
 	  emit_insn (prep_seq);
diff --git a/gcc/testsuite/gcc.target/aarch64/ccmp_1.c
b/gcc/testsuite/gcc.target/aarch64/ccmp_1.c
index ef077e0..7c39b61 100644
--- a/gcc/testsuite/gcc.target/aarch64/ccmp_1.c
+++ b/gcc/testsuite/gcc.target/aarch64/ccmp_1.c
@@ -80,5 +80,16 @@  f13 (int a, int b)
   return a == 3 || a == 0;
 }
 
-/* { dg-final { scan-assembler "fccmp\t" } } */
-/* { dg-final { scan-assembler "fccmpe\t" } } */
+/* { dg-final { scan-assembler "cmp\t(.)+32" } } */
+/* { dg-final { scan-assembler "cmp\t(.)+33" } } */
+/* { dg-final { scan-assembler "cmp\t(.)+34" } } */
+/* { dg-final { scan-assembler "cmp\t(.)+35" } } */
+
+/* { dg-final { scan-assembler-times "\tcmp\tw\[0-9\]+, 0" 4 } } */
+/* { dg-final { scan-assembler-times "fcmpe\t(.)+0\\.0" 2 } } */
+/* { dg-final { scan-assembler-times "fcmp\t(.)+0\\.0" 2 } } */
+
+/* { dg-final { scan-assembler "adds\t" } } */
+/* { dg-final { scan-assembler-times "\tccmp\t" 11 } } */
+/* { dg-final { scan-assembler-times "fccmp\t.*0\\.0" 1 } } */
+/* { dg-final { scan-assembler-times "fccmpe\t.*0\\.0" 1 } } */