Patchwork patch ping: Add static branch predict heuristic of comparing IV to loop_bound variable

login
register
mail settings
Submitter Dehao Chen
Date May 4, 2012, 1:46 p.m.
Message ID <CAO2gOZWGBEUk3vyQXUhWUy1HhPEap_uag8WzY0xzJfULEe0vdw@mail.gmail.com>
Download mbox | patch
Permalink /patch/156937/
State New
Headers show

Comments

Dehao Chen - May 4, 2012, 1:46 p.m.
Hi, Honza,

Thanks for the prompt response. Attached is the updated patch.

Passed bootstrap and all regression tests.

Thanks,
Dehao
Jakub Jelinek - May 8, 2012, 6:28 p.m.
On Fri, May 04, 2012 at 09:46:22PM +0800, Dehao Chen wrote:
> Thanks for the prompt response. Attached is the updated patch.
> 
> Passed bootstrap and all regression tests.

All the new testcases fail for me, on both x86_64-linux and i686-linux,
apparently because of incorrectly committed patch (each testcase source
contains the test twice).

Also, the ChangeLog entries are missing dot at end of each change
description (New instead of New. etc.).

> Index: gcc/testsuite/gcc.dg/predict-3.c
> ===================================================================
> --- gcc/testsuite/gcc.dg/predict-3.c	(revision 0)
> +++ gcc/testsuite/gcc.dg/predict-3.c	(revision 0)
> @@ -0,0 +1,25 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O2 -fdump-tree-profile_estimate" } */
> +
> +extern int global;
> +
> +int bar(int);
> +
> +void foo (int bound)
> +{
> +  int i, ret = 0;
> +  for (i = 0; i <= bound; i++)
> +    {
> +      if (i < bound - 2)
> +	global += bar (i);
> +      if (i <= bound)
> +	global += bar (i);
> +      if (i + 1 < bound)
> +	global += bar (i);
> +      if (i != bound)
> +	global += bar (i);
> +    }
> +}
> +
> +/* { dg-final { scan-tree-dump-times "loop iv compare heuristics:
> 100.0%" 4 "profile_estimate"} } */
> +/* { dg-final { cleanup-tree-dump "profile_estimate" } } */

	Jakub

Patch

Index: gcc/testsuite/gcc.dg/predict-3.c
===================================================================
--- gcc/testsuite/gcc.dg/predict-3.c	(revision 0)
+++ gcc/testsuite/gcc.dg/predict-3.c	(revision 0)
@@ -0,0 +1,25 @@ 
+/* { dg-do compile } */
+/* { dg-options "-O2 -fdump-tree-profile_estimate" } */
+
+extern int global;
+
+int bar(int);
+
+void foo (int bound)
+{
+  int i, ret = 0;
+  for (i = 0; i <= bound; i++)
+    {
+      if (i < bound - 2)
+	global += bar (i);
+      if (i <= bound)
+	global += bar (i);
+      if (i + 1 < bound)
+	global += bar (i);
+      if (i != bound)
+	global += bar (i);
+    }
+}
+
+/* { dg-final { scan-tree-dump-times "loop iv compare heuristics:
100.0%" 4 "profile_estimate"} } */
+/* { dg-final { cleanup-tree-dump "profile_estimate" } } */
Index: gcc/testsuite/gcc.dg/predict-4.c
===================================================================
--- gcc/testsuite/gcc.dg/predict-4.c	(revision 0)
+++ gcc/testsuite/gcc.dg/predict-4.c	(revision 0)
@@ -0,0 +1,19 @@ 
+/* { dg-do compile } */
+/* { dg-options "-O2 -fdump-tree-profile_estimate" } */
+
+extern int global;
+
+int bar(int);
+
+void foo (int bound)
+{
+  int i, ret = 0;
+  for (i = 0; i < 10; i++)
+    {
+      if (i < 5)
+	global += bar (i);
+    }
+}
+
+/* { dg-final { scan-tree-dump "loop iv compare heuristics: 50.0%"
"profile_estimate"} } */
+/* { dg-final { cleanup-tree-dump "profile_estimate" } } */
Index: gcc/testsuite/gcc.dg/predict-1.c
===================================================================
--- gcc/testsuite/gcc.dg/predict-1.c	(revision 0)
+++ gcc/testsuite/gcc.dg/predict-1.c	(revision 0)
@@ -0,0 +1,27 @@ 
+/* { dg-do compile } */
+/* { dg-options "-O2 -fdump-tree-profile_estimate" } */
+
+extern int global;
+
+int bar(int);
+
+void foo (int bound)
+{
+  int i, ret = 0;
+  for (i = 0; i < bound; i++)
+    {
+      if (i > bound)
+	global += bar (i);
+      if (i >= bound + 2)
+	global += bar (i);
+      if (i > bound - 2)
+	global += bar (i);
+      if (i + 2 > bound)
+	global += bar (i);
+      if (i == 10)
+	global += bar (i);
+    }
+}
+
+/* { dg-final { scan-tree-dump-times "loop iv compare heuristics:
0.0%" 5 "profile_estimate"} } */
+/* { dg-final { cleanup-tree-dump "profile_estimate" } } */
Index: gcc/testsuite/gcc.dg/predict-5.c
===================================================================
--- gcc/testsuite/gcc.dg/predict-5.c	(revision 0)
+++ gcc/testsuite/gcc.dg/predict-5.c	(revision 0)
@@ -0,0 +1,25 @@ 
+/* { dg-do compile } */
+/* { dg-options "-O2 -fdump-tree-profile_estimate" } */
+
+extern int global;
+
+int bar (int);
+
+void foo (int base, int bound)
+{
+  int i, ret = 0;
+  for (i = base; i <= bound; i++)
+    {
+      if (i > base)
+	global += bar (i);
+      if (i > base + 1)
+	global += bar (i);
+      if (i >= base + 3)
+	global += bar (i);
+      if (i - 2 >= base)
+	global += bar (i);
+    }
+}
+
+/* { dg-final { scan-tree-dump-times "loop iv compare heuristics:
100.0%" 4 "profile_estimate"} } */
+/* { dg-final { cleanup-tree-dump "profile_estimate" } } */
Index: gcc/testsuite/gcc.dg/predict-2.c
===================================================================
--- gcc/testsuite/gcc.dg/predict-2.c	(revision 0)
+++ gcc/testsuite/gcc.dg/predict-2.c	(revision 0)
@@ -0,0 +1,27 @@ 
+/* { dg-do compile } */
+/* { dg-options "-O2 -fdump-tree-profile_estimate" } */
+
+extern int global;
+
+int bar(int);
+
+void foo (int base, int bound)
+{
+  int i, ret = 0;
+  for (i = base; i < bound; i++)
+    {
+      if (i > bound * bound)
+	global += bar (i);
+      if (i > bound + 10)
+	global += bar (i);
+      if (i <= bound + 10)
+	global += bar (i);
+      if (i > base + 10)
+	global += bar (i);
+      if (i < base - 10)
+	global += bar (i);
+    }
+}
+
+/* { dg-final { scan-tree-dump-not "loop iv compare heuristics"
"profile_estimate"} } */
+/* { dg-final { cleanup-tree-dump "profile_estimate" } } */
Index: gcc/testsuite/gcc.dg/predict-6.c
===================================================================
--- gcc/testsuite/gcc.dg/predict-6.c	(revision 0)
+++ gcc/testsuite/gcc.dg/predict-6.c	(revision 0)
@@ -0,0 +1,25 @@ 
+/* { dg-do compile } */
+/* { dg-options "-O2 -fdump-tree-profile_estimate" } */
+
+extern int global;
+
+int bar (int);
+
+void foo (int base, int bound)
+{
+  int i, ret = 0;
+  for (i = base; i <= bound; i++)
+    {
+      if (i < base)
+	global += bar (i);
+      if (i < base + 1)
+	global += bar (i);
+      if (i <= base + 3)
+	global += bar (i);
+      if (i - 1 < base)
+	global += bar (i);
+    }
+}
+
+/* { dg-final { scan-tree-dump-times "loop iv compare heuristics:
0.0%" 4 "profile_estimate"} } */
+/* { dg-final { cleanup-tree-dump "profile_estimate" } } */
Index: gcc/predict.c
===================================================================
--- gcc/predict.c	(revision 187140)
+++ gcc/predict.c	(working copy)
@@ -946,6 +946,355 @@ 
     }
 }

+/* Check if T1 and T2 satisfy the IV_COMPARE condition.
+   Return the SSA_NAME if the condition satisfies, NULL otherwise.
+
+   T1 and T2 should be one of the following cases:
+     1. T1 is SSA_NAME, T2 is NULL
+     2. T1 is SSA_NAME, T2 is INTEGER_CST between [-4, 4]
+     3. T2 is SSA_NAME, T1 is INTEGER_CST between [-4, 4]  */
+
+static tree
+strips_small_constant (tree t1, tree t2)
+{
+  tree ret = NULL;
+  int value = 0;
+
+  if (!t1)
+    return NULL;
+  else if (TREE_CODE (t1) == SSA_NAME)
+    ret = t1;
+  else if (host_integerp (t1, 0))
+    value = tree_low_cst (t1, 0);
+  else
+    return NULL;
+
+  if (!t2)
+    return ret;
+  else if (host_integerp (t2, 0))
+    value = tree_low_cst (t2, 0);
+  else if (TREE_CODE (t2) == SSA_NAME)
+    {
+      if (ret)
+        return NULL;
+      else
+        ret = t2;
+    }
+
+  if (value <= 4 && value >= -4)
+    return ret;
+  else
+    return NULL;
+}
+
+/* Return the SSA_NAME in T or T's operands.
+   Return NULL if SSA_NAME cannot be found.  */
+
+static tree
+get_base_value (tree t)
+{
+  if (TREE_CODE (t) == SSA_NAME)
+    return t;
+
+  if (!BINARY_CLASS_P (t))
+    return NULL;
+
+  switch (TREE_OPERAND_LENGTH (t))
+    {
+    case 1:
+      return strips_small_constant (TREE_OPERAND (t, 0), NULL);
+    case 2:
+      return strips_small_constant (TREE_OPERAND (t, 0),
+				    TREE_OPERAND (t, 1));
+    default:
+      return NULL;
+    }
+}
+
+/* Check the compare STMT in LOOP. If it compares an induction
+   variable to a loop invariant, return true, and save
+   LOOP_INVARIANT, COMPARE_CODE and LOOP_STEP.
+   Otherwise return false and set LOOP_INVAIANT to NULL.  */
+
+static bool
+is_comparison_with_loop_invariant_p (gimple stmt, struct loop *loop,
+				     tree *loop_invariant,
+				     enum tree_code *compare_code,
+				     int *loop_step,
+				     tree *loop_iv_base)
+{
+  tree op0, op1, bound, base;
+  affine_iv iv0, iv1;
+  enum tree_code code;
+  int step;
+
+  code = gimple_cond_code (stmt);
+  *loop_invariant = NULL;
+
+  switch (code)
+    {
+    case GT_EXPR:
+    case GE_EXPR:
+    case NE_EXPR:
+    case LT_EXPR:
+    case LE_EXPR:
+    case EQ_EXPR:
+      break;
+
+    default:
+      return false;
+    }
+
+  op0 = gimple_cond_lhs (stmt);
+  op1 = gimple_cond_rhs (stmt);
+
+  if ((TREE_CODE (op0) != SSA_NAME && TREE_CODE (op0) != INTEGER_CST)
+       || (TREE_CODE (op1) != SSA_NAME && TREE_CODE (op1) != INTEGER_CST))
+    return false;
+  if (!simple_iv (loop, loop_containing_stmt (stmt), op0, &iv0, true))
+    return false;
+  if (!simple_iv (loop, loop_containing_stmt (stmt), op1, &iv1, true))
+    return false;
+  if (TREE_CODE (iv0.step) != INTEGER_CST
+      || TREE_CODE (iv1.step) != INTEGER_CST)
+    return false;
+  if ((integer_zerop (iv0.step) && integer_zerop (iv1.step))
+      || (!integer_zerop (iv0.step) && !integer_zerop (iv1.step)))
+    return false;
+
+  if (integer_zerop (iv0.step))
+    {
+      if (code != NE_EXPR && code != EQ_EXPR)
+	code = invert_tree_comparison (code, false);
+      bound = iv0.base;
+      base = iv1.base;
+      if (host_integerp (iv1.step, 0))
+	step = tree_low_cst (iv1.step, 0);
+      else
+	return false;
+    }
+  else
+    {
+      bound = iv1.base;
+      base = iv0.base;
+      if (host_integerp (iv0.step, 0))
+	step = tree_low_cst (iv0.step, 0);
+      else
+	return false;
+    }
+
+  if (TREE_CODE (bound) != INTEGER_CST)
+    bound = get_base_value (bound);
+  if (!bound)
+    return false;
+  if (TREE_CODE (base) != INTEGER_CST)
+    base = get_base_value (base);
+  if (!base)
+    return false;
+
+  *loop_invariant = bound;
+  *compare_code = code;
+  *loop_step = step;
+  *loop_iv_base = base;
+  return true;
+}
+
+/* Compare two SSA_NAMEs: returns TRUE if T1 and T2 are value coherent.  */
+
+static bool
+expr_coherent_p (tree t1, tree t2)
+{
+  gimple stmt;
+  tree ssa_name_1 = NULL;
+  tree ssa_name_2 = NULL;
+
+  gcc_assert (TREE_CODE (t1) == SSA_NAME || TREE_CODE (t1) == INTEGER_CST);
+  gcc_assert (TREE_CODE (t2) == SSA_NAME || TREE_CODE (t2) == INTEGER_CST);
+
+  if (t1 == t2)
+    return true;
+
+  if (TREE_CODE (t1) == INTEGER_CST && TREE_CODE (t2) == INTEGER_CST)
+    return true;
+  if (TREE_CODE (t1) == INTEGER_CST || TREE_CODE (t2) == INTEGER_CST)
+    return false;
+
+  /* Check to see if t1 is expressed/defined with t2.  */
+  stmt = SSA_NAME_DEF_STMT (t1);
+  gcc_assert (stmt != NULL);
+  if (is_gimple_assign (stmt))
+    {
+      ssa_name_1 = SINGLE_SSA_TREE_OPERAND (stmt, SSA_OP_USE);
+      if (ssa_name_1 && ssa_name_1 == t2)
+	return true;
+    }
+
+  /* Check to see if t2 is expressed/defined with t1.  */
+  stmt = SSA_NAME_DEF_STMT (t2);
+  gcc_assert (stmt != NULL);
+  if (is_gimple_assign (stmt))
+    {
+      ssa_name_2 = SINGLE_SSA_TREE_OPERAND (stmt, SSA_OP_USE);
+      if (ssa_name_2 && ssa_name_2 == t1)
+	return true;
+    }
+
+  /* Compare if t1 and t2's def_stmts are identical.  */
+  if (ssa_name_2 != NULL && ssa_name_1 == ssa_name_2)
+    return true;
+  else
+    return false;
+}
+
+/* Predict branch probability of BB when BB contains a branch that compares
+   an induction variable in LOOP with LOOP_IV_BASE_VAR to LOOP_BOUND_VAR. The
+   loop exit is compared using LOOP_BOUND_CODE, with step of LOOP_BOUND_STEP.
+
+   E.g.
+     for (int i = 0; i < bound; i++) {
+       if (i < bound - 2)
+	 computation_1();
+       else
+	 computation_2();
+     }
+
+  In this loop, we will predict the branch inside the loop to be taken.  */
+
+static void
+predict_iv_comparison (struct loop *loop, basic_block bb,
+		       tree loop_bound_var,
+		       tree loop_iv_base_var,
+		       enum tree_code loop_bound_code,
+		       int loop_bound_step)
+{
+  gimple stmt;
+  tree compare_var, compare_base;
+  enum tree_code compare_code;
+  int compare_step;
+  edge then_edge;
+  edge_iterator ei;
+
+  if (predicted_by_p (bb, PRED_LOOP_ITERATIONS_GUESSED)
+      || predicted_by_p (bb, PRED_LOOP_ITERATIONS)
+      || predicted_by_p (bb, PRED_LOOP_EXIT))
+    return;
+
+  stmt = last_stmt (bb);
+  if (!stmt || gimple_code (stmt) != GIMPLE_COND)
+    return;
+  if (!is_comparison_with_loop_invariant_p (stmt, loop, &compare_var,
+					    &compare_code,
+					    &compare_step,
+					    &compare_base))
+    return;
+
+  /* Find the taken edge.  */
+  FOR_EACH_EDGE (then_edge, ei, bb->succs)
+    if (then_edge->flags & EDGE_TRUE_VALUE)
+      break;
+
+  /* When comparing an IV to a loop invariant, NE is more likely to be
+     taken while EQ is more likely to be not-taken.  */
+  if (compare_code == NE_EXPR)
+    {
+      predict_edge_def (then_edge, PRED_LOOP_IV_COMPARE_GUESS, TAKEN);
+      return;
+    }
+  else if (compare_code == EQ_EXPR)
+    {
+      predict_edge_def (then_edge, PRED_LOOP_IV_COMPARE_GUESS, NOT_TAKEN);
+      return;
+    }
+
+  if (!expr_coherent_p (loop_iv_base_var, compare_base))
+    return;
+
+  /* If loop bound, base and compare bound are all constants, we can
+     calculate the probability directly.  */
+  if (host_integerp (loop_bound_var, 0)
+      && host_integerp (compare_var, 0)
+      && host_integerp (compare_base, 0))
+    {
+      int probability;
+      HOST_WIDE_INT compare_count;
+      HOST_WIDE_INT loop_bound = tree_low_cst (loop_bound_var, 0);
+      HOST_WIDE_INT compare_bound = tree_low_cst (compare_var, 0);
+      HOST_WIDE_INT base = tree_low_cst (compare_base, 0);
+      HOST_WIDE_INT loop_count = (loop_bound - base) / compare_step;
+
+      if ((compare_step > 0)
+          ^ (compare_code == LT_EXPR || compare_code == LE_EXPR))
+	compare_count = (loop_bound - compare_bound) / compare_step;
+      else
+	compare_count = (compare_bound - base) / compare_step;
+
+      if (compare_code == LE_EXPR || compare_code == GE_EXPR)
+	compare_count ++;
+      if (loop_bound_code == LE_EXPR || loop_bound_code == GE_EXPR)
+	loop_count ++;
+      if (compare_count < 0)
+	compare_count = 0;
+      if (loop_count < 0)
+	loop_count = 0;
+
+      if (loop_count == 0)
+	probability = 0;
+      else if (compare_count > loop_count)
+	probability = REG_BR_PROB_BASE;
+      else
+	probability = (double) REG_BR_PROB_BASE * compare_count / loop_count;
+      predict_edge (then_edge, PRED_LOOP_IV_COMPARE, probability);
+      return;
+    }
+
+  if (expr_coherent_p (loop_bound_var, compare_var))
+    {
+      if ((loop_bound_code == LT_EXPR || loop_bound_code == LE_EXPR)
+	  && (compare_code == LT_EXPR || compare_code == LE_EXPR))
+	predict_edge_def (then_edge, PRED_LOOP_IV_COMPARE_GUESS, TAKEN);
+      else if ((loop_bound_code == GT_EXPR || loop_bound_code == GE_EXPR)
+	       && (compare_code == GT_EXPR || compare_code == GE_EXPR))
+	predict_edge_def (then_edge, PRED_LOOP_IV_COMPARE_GUESS, TAKEN);
+      else if (loop_bound_code == NE_EXPR)
+	{
+	  /* If the loop backedge condition is "(i != bound)", we do
+	     the comparison based on the step of IV:
+	     * step < 0 : backedge condition is like (i > bound)
+	     * step > 0 : backedge condition is like (i < bound)  */
+	  gcc_assert (loop_bound_step != 0);
+	  if (loop_bound_step > 0
+	      && (compare_code == LT_EXPR
+		  || compare_code == LE_EXPR))
+	    predict_edge_def (then_edge, PRED_LOOP_IV_COMPARE_GUESS, TAKEN);
+	  else if (loop_bound_step < 0
+		   && (compare_code == GT_EXPR
+		       || compare_code == GE_EXPR))
+	    predict_edge_def (then_edge, PRED_LOOP_IV_COMPARE_GUESS, TAKEN);
+	  else
+	    predict_edge_def (then_edge, PRED_LOOP_IV_COMPARE_GUESS, NOT_TAKEN);
+	}
+      else
+	/* The branch is predicted not-taken if loop_bound_code is
+	   opposite with compare_code.  */
+	predict_edge_def (then_edge, PRED_LOOP_IV_COMPARE_GUESS, NOT_TAKEN);
+    }
+  else if (expr_coherent_p (loop_iv_base_var, compare_var))
+    {
+      /* For cases like:
+	   for (i = s; i < h; i++)
+	     if (i > s + 2) ....
+	 The branch should be predicted taken.  */
+      if (loop_bound_step > 0
+	  && (compare_code == GT_EXPR || compare_code == GE_EXPR))
+	predict_edge_def (then_edge, PRED_LOOP_IV_COMPARE_GUESS, TAKEN);
+      else if (loop_bound_step < 0
+	       && (compare_code == LT_EXPR || compare_code == LE_EXPR))
+	predict_edge_def (then_edge, PRED_LOOP_IV_COMPARE_GUESS, TAKEN);
+      else
+	predict_edge_def (then_edge, PRED_LOOP_IV_COMPARE_GUESS, NOT_TAKEN);
+    }
+}
+
 /* Predict edge probabilities by exploiting loop structure.  */

 static void
@@ -963,6 +1312,12 @@ 
       VEC (edge, heap) *exits;
       struct tree_niter_desc niter_desc;
       edge ex;
+      struct nb_iter_bound *nb_iter;
+      enum tree_code loop_bound_code = ERROR_MARK;
+      int loop_bound_step = 0;
+      tree loop_bound_var = NULL;
+      tree loop_iv_base = NULL;
+      gimple stmt = NULL;

       exits = get_loop_exit_edges (loop);
       n_exits = VEC_length (edge, exits);
@@ -1010,6 +1365,25 @@ 
 	}
       VEC_free (edge, heap, exits);

+      /* Find information about loop bound variables.  */
+      for (nb_iter = loop->bounds; nb_iter;
+	   nb_iter = nb_iter->next)
+	if (nb_iter->stmt
+	    && gimple_code (nb_iter->stmt) == GIMPLE_COND)
+	  {
+	    stmt = nb_iter->stmt;
+	    break;
+	  }
+      if (!stmt && last_stmt (loop->header)
+	  && gimple_code (last_stmt (loop->header)) == GIMPLE_COND)
+	stmt = last_stmt (loop->header);
+      if (stmt)
+	is_comparison_with_loop_invariant_p (stmt, loop,
+					     &loop_bound_var,
+					     &loop_bound_code,
+					     &loop_bound_step,
+					     &loop_iv_base);
+
       bbs = get_loop_body (loop);

       for (j = 0; j < loop->num_nodes; j++)
@@ -1071,6 +1445,10 @@ 
 		    || !flow_bb_inside_loop_p (loop, e->dest))
 		  predict_edge (e, PRED_LOOP_EXIT, probability);
 	    }
+	  if (loop_bound_var)
+	    predict_iv_comparison (loop, bb, loop_bound_var, loop_iv_base,
+				   loop_bound_code,
+				   loop_bound_step);
 	}

       /* Free basic blocks from get_loop_body.  */
Index: gcc/predict.def
===================================================================
--- gcc/predict.def	(revision 187140)
+++ gcc/predict.def	(working copy)
@@ -116,3 +116,13 @@ 

 /* Branches to a mudflap bounds check are extremely unlikely.  */
 DEF_PREDICTOR (PRED_MUDFLAP, "mudflap check", PROB_VERY_LIKELY, 0)
+
+/* Branches to compare induction variable to a loop bound is
+   extremely likely.  */
+DEF_PREDICTOR (PRED_LOOP_IV_COMPARE_GUESS, "guess loop iv compare",
+	       PROB_VERY_LIKELY, 0)
+
+/* Use number of loop iterations determined by # of iterations analysis
+   to set probability of branches that compares IV to loop bound variable.  */
+DEF_PREDICTOR (PRED_LOOP_IV_COMPARE, "loop iv compare", PROB_VERY_LIKELY,
+	       PRED_FLAG_FIRST_MATCH)