diff mbox

[RFC:,2/6,v2] Factor out the comparisons against magic numbers in ifcvt

Message ID 1466524231-17412-2-git-send-email-james.greenhalgh@arm.com
State New
Headers show

Commit Message

James Greenhalgh June 21, 2016, 3:50 p.m. UTC
Hi,

This patch pulls the comparisons between if_info->branch_cost and a magic
number representing an instruction count to a common function. While I'm
doing it, I've documented the instructions that the magic numbers relate
to, and updated them where they were inconsistent.

If our measure of the cost of a branch is now in rtx costs units, we can
get to an estimate for the cost of an expression from the number of
instructions by multiplying through by COSTS_N_INSNS (1).

Alternatively, we could actually construct the cheap sequences and
check the sequence. But in these cases we're expecting to if-convert on
almost all targets, the transforms in this patch are almost universally
a good idea, even for targets with a very powerful branch predictor,
eliminating the branch eliminates a basic block boundary so might be
helpful for scheduling, combine, and other RTL optimizers.

Bootstrapped on x86-64 and aarch64 as part of the full sequence.

OK?

Thanks,
James

---

2016-06-21  James Greenhalgh  <james.greenhalgh@arm.com>

	* ifcvt.c (noce_if_info): New field: max_seq_cost.
	(noce_estimate_conversion_profitable_p): New.
	(noce_try_store_flag_constants): Use it.
	(noce_try_addcc): Likewise.
	(noce_try_store_flag_mask): Likewise.
	(noce_try_cmove): Likewise.
	(noce_try_cmove_arith): Likewise.
	(noce_find_if_block): Record targetm.max_noce_ifcvt_seq_cost.

Comments

Jeff Law July 13, 2016, 9:18 p.m. UTC | #1
On 06/21/2016 09:50 AM, James Greenhalgh wrote:
>
> Hi,
>
> This patch pulls the comparisons between if_info->branch_cost and a magic
> number representing an instruction count to a common function. While I'm
> doing it, I've documented the instructions that the magic numbers relate
> to, and updated them where they were inconsistent.
>
> If our measure of the cost of a branch is now in rtx costs units, we can
> get to an estimate for the cost of an expression from the number of
> instructions by multiplying through by COSTS_N_INSNS (1).
>
> Alternatively, we could actually construct the cheap sequences and
> check the sequence. But in these cases we're expecting to if-convert on
> almost all targets, the transforms in this patch are almost universally
> a good idea, even for targets with a very powerful branch predictor,
> eliminating the branch eliminates a basic block boundary so might be
> helpful for scheduling, combine, and other RTL optimizers.
>
> Bootstrapped on x86-64 and aarch64 as part of the full sequence.
>
> OK?
>
> Thanks,
> James
>
> ---
>
> 2016-06-21  James Greenhalgh  <james.greenhalgh@arm.com>
>
> 	* ifcvt.c (noce_if_info): New field: max_seq_cost.
> 	(noce_estimate_conversion_profitable_p): New.
> 	(noce_try_store_flag_constants): Use it.
> 	(noce_try_addcc): Likewise.
> 	(noce_try_store_flag_mask): Likewise.
> 	(noce_try_cmove): Likewise.
> 	(noce_try_cmove_arith): Likewise.
> 	(noce_find_if_block): Record targetm.max_noce_ifcvt_seq_cost.
>
LGTM.

jeff
diff mbox

Patch

diff --git a/gcc/ifcvt.c b/gcc/ifcvt.c
index fd29516..0b97114 100644
--- a/gcc/ifcvt.c
+++ b/gcc/ifcvt.c
@@ -814,6 +814,10 @@  struct noce_if_info
   /* Estimated cost of the particular branch instruction.  */
   unsigned int branch_cost;
 
+  /* Maximum permissible cost for the unconditional sequence we should
+     generate to replace this branch.  */
+  unsigned int max_seq_cost;
+
   /* The name of the noce transform that succeeded in if-converting
      this structure.  Used for debugging.  */
   const char *transform_name;
@@ -835,6 +839,17 @@  static int noce_try_minmax (struct noce_if_info *);
 static int noce_try_abs (struct noce_if_info *);
 static int noce_try_sign_mask (struct noce_if_info *);
 
+/* This function is always called when we would expand a number of "cheap"
+   instructions.  Multiply NINSNS by COSTS_N_INSNS (1) to approximate the
+   RTX cost of those cheap instructions.  */
+
+inline static bool
+noce_estimate_conversion_profitable_p (struct noce_if_info *if_info,
+				       unsigned int ninsns)
+{
+  return if_info->max_seq_cost >= ninsns * COSTS_N_INSNS (1);
+}
+
 /* Helper function for noce_try_store_flag*.  */
 
 static rtx
@@ -1320,7 +1335,8 @@  noce_try_store_flag_constants (struct noce_if_info *if_info)
       && (REG_P (XEXP (a, 0))
 	  || (noce_operand_ok (XEXP (a, 0))
 	      && ! reg_overlap_mentioned_p (if_info->x, XEXP (a, 0))))
-      && if_info->branch_cost >= 2)
+      /* We need one instruction, the ADD of the store flag.  */
+      && noce_estimate_conversion_profitable_p (if_info, 1))
     {
       common = XEXP (a, 0);
       a = XEXP (a, 1);
@@ -1393,22 +1409,32 @@  noce_try_store_flag_constants (struct noce_if_info *if_info)
 	  else
 	    gcc_unreachable ();
 	}
+      /* Is this (cond) ? 2^n : 0?  */
       else if (ifalse == 0 && exact_log2 (itrue) >= 0
 	       && (STORE_FLAG_VALUE == 1
-		   || if_info->branch_cost >= 2))
+		   /* We need ASHIFT, IOR.   */
+		   || noce_estimate_conversion_profitable_p (if_info, 2)))
 	normalize = 1;
+      /* Is this (cond) ? 0 : 2^n?  */
       else if (itrue == 0 && exact_log2 (ifalse) >= 0 && can_reverse
-	       && (STORE_FLAG_VALUE == 1 || if_info->branch_cost >= 2))
+	       && (STORE_FLAG_VALUE == 1
+		   /* We need ASHIFT, IOR.  */
+		   || noce_estimate_conversion_profitable_p (if_info, 2)))
 	{
 	  normalize = 1;
 	  reversep = true;
 	}
+      /* Is this (cond) ? -1 : x?  */
       else if (itrue == -1
 	       && (STORE_FLAG_VALUE == -1
-		   || if_info->branch_cost >= 2))
+		   /* Just an IOR.  */
+		   || noce_estimate_conversion_profitable_p (if_info, 1)))
 	normalize = -1;
+      /* Is this (cond) ? x : -1?  */
       else if (ifalse == -1 && can_reverse
-	       && (STORE_FLAG_VALUE == -1 || if_info->branch_cost >= 2))
+	       && (STORE_FLAG_VALUE == -1
+		   /* Just an IOR.  */
+		   || noce_estimate_conversion_profitable_p (if_info, 1)))
 	{
 	  normalize = -1;
 	  reversep = true;
@@ -1564,8 +1590,8 @@  noce_try_addcc (struct noce_if_info *if_info)
 	}
 
       /* If that fails, construct conditional increment or decrement using
-	 setcc.  */
-      if (if_info->branch_cost >= 2
+	 setcc.  We'd only need an ADD/SUB for this.  */
+      if (noce_estimate_conversion_profitable_p (if_info, 1)
 	  && (XEXP (if_info->a, 1) == const1_rtx
 	      || XEXP (if_info->a, 1) == constm1_rtx))
         {
@@ -1621,7 +1647,9 @@  noce_try_store_flag_mask (struct noce_if_info *if_info)
     return FALSE;
 
   reversep = 0;
-  if ((if_info->branch_cost >= 2
+
+  /* One instruction, AND.  */
+  if ((noce_estimate_conversion_profitable_p (if_info, 1)
        || STORE_FLAG_VALUE == -1)
       && ((if_info->a == const0_rtx
 	   && rtx_equal_p (if_info->b, if_info->x))
@@ -1828,8 +1856,11 @@  noce_try_cmove (struct noce_if_info *if_info)
 	 approach.  */
       else if (!targetm.have_conditional_execution ()
 		&& CONST_INT_P (if_info->a) && CONST_INT_P (if_info->b)
-		&& ((if_info->branch_cost >= 2 && STORE_FLAG_VALUE == -1)
-		    || if_info->branch_cost >= 3))
+		/* If STORE_FLAG_VALUE is -1, we need SUB, AND, PLUS.  */
+		&& ((noce_estimate_conversion_profitable_p (if_info, 3)
+		     && STORE_FLAG_VALUE == -1)
+		    /* Otherwise, we need NEG, SUB, AND, PLUS.  */
+		    || noce_estimate_conversion_profitable_p (if_info, 4)))
 	{
 	  machine_mode mode = GET_MODE (if_info->x);
 	  HOST_WIDE_INT ifalse = INTVAL (if_info->a);
@@ -2082,7 +2113,7 @@  noce_try_cmove_arith (struct noce_if_info *if_info)
   if (cse_not_expected
       && MEM_P (a) && MEM_P (b)
       && MEM_ADDR_SPACE (a) == MEM_ADDR_SPACE (b)
-      && if_info->branch_cost >= 5)
+      && noce_estimate_conversion_profitable_p (if_info, 5))
     {
       machine_mode address_mode = get_address_mode (a);
 
@@ -4041,6 +4072,9 @@  noce_find_if_block (basic_block test_bb, edge then_edge, edge else_edge,
   if_info.then_else_reversed = then_else_reversed;
   if_info.branch_cost = BRANCH_COST (optimize_bb_for_speed_p (test_bb),
 				     predictable_edge_p (then_edge));
+  if_info.max_seq_cost
+    = targetm.max_noce_ifcvt_seq_cost (optimize_bb_for_speed_p (test_bb),
+				       then_edge);
 
   /* Do the real work.  */