diff mbox

ICF versus inlining

Message ID 20150129070336.GA4982@kam.mff.cuni.cz
State New
Headers show

Commit Message

Jan Hubicka Jan. 29, 2015, 7:03 a.m. UTC
Hi,
the PR is about function not being inlined because it is called via a wrapper
introduced by ICF merging code. cgraph_node::create_wrapper set
call_stmt_cannot_inline_p that I suggested to Martin to try to figure out how
much of merging is undone by inliner.  It was not meant to get into mainline.

Because the wrappers may end up at performance critical places, I also added
profile mainteinance - otherwise all BBs will get count of 0 and after
inlining we end up with lousy code with -fprofile-use.

To get sane inlining we probably want to make inliner to realize that call
cost in thunk is minimal.  I will look into that incrementally.

Bootstrapped/regtested x86_64-linux, will commit shortly.

	PR ipa/64801
	* gcc.dg/tree-ssa/pr64801.c: New testcase.
	* cgraphunit.c (init_lowered_empty_function): Add CoUNT parameter;
	make sane BB profile.
	(cgraph_node::expand_thunk): Make sane BB profile.
	(cgraph_node::create_wrapper): Do not set call_stmt_cannot_inline_p.
	* cgraph.h (init_lowered_empty_function): Update prototype.
	* config/i386/i386.c (make_resolver_func): Update call.
	* predict.c (gate): Disable branch prediction pass if
	profile is already there.

Comments

H.J. Lu Jan. 29, 2015, 1:17 p.m. UTC | #1
On Wed, Jan 28, 2015 at 11:03 PM, Jan Hubicka <hubicka@ucw.cz> wrote:
> Hi,
> the PR is about function not being inlined because it is called via a wrapper
> introduced by ICF merging code. cgraph_node::create_wrapper set
> call_stmt_cannot_inline_p that I suggested to Martin to try to figure out how
> much of merging is undone by inliner.  It was not meant to get into mainline.
>
> Because the wrappers may end up at performance critical places, I also added
> profile mainteinance - otherwise all BBs will get count of 0 and after
> inlining we end up with lousy code with -fprofile-use.
>
> To get sane inlining we probably want to make inliner to realize that call
> cost in thunk is minimal.  I will look into that incrementally.
>
> Bootstrapped/regtested x86_64-linux, will commit shortly.
>
>         PR ipa/64801
>         * gcc.dg/tree-ssa/pr64801.c: New testcase.
>         * cgraphunit.c (init_lowered_empty_function): Add CoUNT parameter;
>         make sane BB profile.
>         (cgraph_node::expand_thunk): Make sane BB profile.
>         (cgraph_node::create_wrapper): Do not set call_stmt_cannot_inline_p.
>         * cgraph.h (init_lowered_empty_function): Update prototype.
>         * config/i386/i386.c (make_resolver_func): Update call.
>         * predict.c (gate): Disable branch prediction pass if
>         profile is already there.
>
> Index: testsuite/gcc.dg/tree-ssa/pr64801.c
> ===================================================================
> --- testsuite/gcc.dg/tree-ssa/pr64801.c (revision 0)
> +++ testsuite/gcc.dg/tree-ssa/pr64801.c (revision 0)
> @@ -0,0 +1,21 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O2 -fdump-tree-optimized" } */
> +int a;
> +int
> +elantech_detect (void)
> +{
> +  return -38;
> +}
> +inline int
> +fsp_detect (void)
> +{
> +  return -38;
> +}
> +void
> +psmouse_extensions (void)
> +{
> +  int (*b)() = fsp_detect;
> +  a = b ();
> +}
> +/* { dg-final { scan-tree-dump-not "fsp_detect"} } */
> +/* { dg-final { cleanup-tree-dump "optimized" } } */

I got

ERROR: gcc.dg/tree-ssa/pr64801.c: error executing dg-final:
scan-tree-dump-not: too few arguments
diff mbox

Patch

Index: testsuite/gcc.dg/tree-ssa/pr64801.c
===================================================================
--- testsuite/gcc.dg/tree-ssa/pr64801.c	(revision 0)
+++ testsuite/gcc.dg/tree-ssa/pr64801.c	(revision 0)
@@ -0,0 +1,21 @@ 
+/* { dg-do compile } */
+/* { dg-options "-O2 -fdump-tree-optimized" } */
+int a;
+int
+elantech_detect (void)
+{
+  return -38;
+}
+inline int
+fsp_detect (void)
+{
+  return -38;
+}
+void
+psmouse_extensions (void)
+{
+  int (*b)() = fsp_detect;
+  a = b ();
+}
+/* { dg-final { scan-tree-dump-not "fsp_detect"} } */
+/* { dg-final { cleanup-tree-dump "optimized" } } */
Index: cgraphunit.c
===================================================================
--- cgraphunit.c	(revision 220229)
+++ cgraphunit.c	(working copy)
@@ -1325,9 +1325,10 @@  mark_functions_to_output (void)
    return basic block in the function body.  */
 
 basic_block
-init_lowered_empty_function (tree decl, bool in_ssa)
+init_lowered_empty_function (tree decl, bool in_ssa, gcov_type count)
 {
   basic_block bb;
+  edge e;
 
   current_function_decl = decl;
   allocate_struct_function (decl, false);
@@ -1353,9 +1354,19 @@  init_lowered_empty_function (tree decl,
   loops_for_fn (cfun)->state |= LOOPS_MAY_HAVE_MULTIPLE_LATCHES;
 
   /* Create BB for body of the function and connect it properly.  */
+  ENTRY_BLOCK_PTR_FOR_FN (cfun)->count = count;
+  ENTRY_BLOCK_PTR_FOR_FN (cfun)->frequency = REG_BR_PROB_BASE;
+  EXIT_BLOCK_PTR_FOR_FN (cfun)->count = count;
+  EXIT_BLOCK_PTR_FOR_FN (cfun)->frequency = REG_BR_PROB_BASE;
   bb = create_basic_block (NULL, (void *) 0, ENTRY_BLOCK_PTR_FOR_FN (cfun));
-  make_edge (ENTRY_BLOCK_PTR_FOR_FN (cfun), bb, EDGE_FALLTHRU);
-  make_edge (bb, EXIT_BLOCK_PTR_FOR_FN (cfun), 0);
+  bb->count = count;
+  bb->frequency = BB_FREQ_MAX;
+  e = make_edge (ENTRY_BLOCK_PTR_FOR_FN (cfun), bb, EDGE_FALLTHRU);
+  e->count = count;
+  e->probability = REG_BR_PROB_BASE;
+  e = make_edge (bb, EXIT_BLOCK_PTR_FOR_FN (cfun), 0);
+  e->count = count;
+  e->probability = REG_BR_PROB_BASE;
   add_bb_to_loop (bb, ENTRY_BLOCK_PTR_FOR_FN (cfun)->loop_father);
 
   return bb;
@@ -1578,7 +1589,8 @@  cgraph_node::expand_thunk (bool output_a
       else
 	resdecl = DECL_RESULT (thunk_fndecl);
 
-      bb = then_bb = else_bb = return_bb = init_lowered_empty_function (thunk_fndecl, true);
+      bb = then_bb = else_bb = return_bb
+	= init_lowered_empty_function (thunk_fndecl, true, count);
 
       bsi = gsi_start_bb (bb);
 
@@ -1654,13 +1666,20 @@  cgraph_node::expand_thunk (bool output_a
 	      if (TREE_CODE (TREE_TYPE (restmp)) == POINTER_TYPE)
 		{
 		  gimple stmt;
+		  edge e;
 		  /* If the return type is a pointer, we need to
 		     protect against NULL.  We know there will be an
 		     adjustment, because that's why we're emitting a
 		     thunk.  */
 		  then_bb = create_basic_block (NULL, (void *) 0, bb);
+		  then_bb->count = count - count / 16;
+		  then_bb->frequency = BB_FREQ_MAX - BB_FREQ_MAX / 16;
 		  return_bb = create_basic_block (NULL, (void *) 0, then_bb);
+		  return_bb->count = count;
+		  return_bb->frequency = BB_FREQ_MAX;
 		  else_bb = create_basic_block (NULL, (void *) 0, else_bb);
+		  then_bb->count = count / 16;
+		  then_bb->frequency = BB_FREQ_MAX / 16;
 		  add_bb_to_loop (then_bb, bb->loop_father);
 		  add_bb_to_loop (return_bb, bb->loop_father);
 		  add_bb_to_loop (else_bb, bb->loop_father);
@@ -1670,11 +1689,21 @@  cgraph_node::expand_thunk (bool output_a
 					    build_zero_cst (TREE_TYPE (restmp)),
 					    NULL_TREE, NULL_TREE);
 		  gsi_insert_after (&bsi, stmt, GSI_NEW_STMT);
-		  make_edge (bb, then_bb, EDGE_TRUE_VALUE);
-		  make_edge (bb, else_bb, EDGE_FALSE_VALUE);
-		  make_edge (return_bb, EXIT_BLOCK_PTR_FOR_FN (cfun), 0);
-		  make_edge (then_bb, return_bb, EDGE_FALLTHRU);
-		  make_edge (else_bb, return_bb, EDGE_FALLTHRU);
+		  e = make_edge (bb, then_bb, EDGE_TRUE_VALUE);
+		  e->probability = REG_BR_PROB_BASE - REG_BR_PROB_BASE / 16;
+		  e->count = count - count / 16;
+		  e = make_edge (bb, else_bb, EDGE_FALSE_VALUE);
+		  e->probability = REG_BR_PROB_BASE / 16;
+		  e->count = count / 16;
+		  e = make_edge (return_bb, EXIT_BLOCK_PTR_FOR_FN (cfun), 0);
+		  e->probability = REG_BR_PROB_BASE;
+		  e->count = count;
+		  e = make_edge (then_bb, return_bb, EDGE_FALLTHRU);
+		  e->probability = REG_BR_PROB_BASE;
+		  e->count = count - count / 16;
+		  e = make_edge (else_bb, return_bb, EDGE_FALLTHRU);
+		  e->probability = REG_BR_PROB_BASE;
+		  e->count = count / 16;
 		  bsi = gsi_last_bb (then_bb);
 		}
 
@@ -1708,6 +1737,8 @@  cgraph_node::expand_thunk (bool output_a
 	}
 
       cfun->gimple_df->in_ssa_p = true;
+      profile_status_for_fn (cfun)
+        = count ? PROFILE_READ : PROFILE_GUESSED;
       /* FIXME: C++ FE should stop setting TREE_ASM_WRITTEN on thunks.  */
       TREE_ASM_WRITTEN (thunk_fndecl) = false;
       delete_unreachable_blocks ();
@@ -2415,8 +2446,7 @@  cgraph_node::create_wrapper (cgraph_node
   definition = true;
   thunk.thunk_p = true;
   thunk.this_adjusting = false;
-
-  cgraph_edge *e = create_edge (target, NULL, 0, CGRAPH_FREQ_BASE);
+  create_edge (target, NULL, count, CGRAPH_FREQ_BASE);
 
   tree arguments = DECL_ARGUMENTS (decl);
 
@@ -2427,7 +2457,6 @@  cgraph_node::create_wrapper (cgraph_node
     }
 
   expand_thunk (false, true);
-  e->call_stmt_cannot_inline_p = true;
 
   /* Inline summary set-up.  */
   analyze ();
Index: cgraph.h
===================================================================
--- cgraph.h	(revision 220229)
+++ cgraph.h	(working copy)
@@ -2194,7 +2194,7 @@  void cgraphunit_c_finalize (void);
 
 /*  Initialize datastructures so DECL is a function in lowered gimple form.
     IN_SSA is true if the gimple is in SSA.  */
-basic_block init_lowered_empty_function (tree, bool);
+basic_block init_lowered_empty_function (tree, bool, gcov_type);
 
 /* In cgraphclones.c  */
 
Index: config/i386/i386.c
===================================================================
--- config/i386/i386.c	(revision 220229)
+++ config/i386/i386.c	(working copy)
@@ -35145,7 +35145,7 @@  make_resolver_func (const tree default_d
 
   gimplify_function_tree (decl);
   push_cfun (DECL_STRUCT_FUNCTION (decl));
-  *empty_bb = init_lowered_empty_function (decl, false);
+  *empty_bb = init_lowered_empty_function (decl, false, 0);
 
   cgraph_node::add_new_function (decl, true);
   symtab->call_cgraph_insertion_hooks (cgraph_node::get_create (decl));
Index: predict.c
===================================================================
--- predict.c	(revision 220229)
+++ predict.c	(working copy)
@@ -3040,7 +3040,8 @@  public:
   {}
 
   /* opt_pass methods: */
-  virtual bool gate (function *) { return flag_guess_branch_prob; }
+  virtual bool gate (function *)
+    { return flag_guess_branch_prob && profile_status < PROFILE_GUESSED; }
   virtual unsigned int execute (function *);
 
 }; // class pass_profile