diff mbox

[GOOGLE] Patch to fix AutoFDO LIPO performance regression

Message ID CAO2gOZUmakeqvbY3dYQ=8W4KJ+8T=WbPcD9pQV82AnsXqmgJTw@mail.gmail.com
State New
Headers show

Commit Message

Dehao Chen Sept. 19, 2013, 5:10 p.m. UTC
Thanks, patch updated:

On Wed, Sep 18, 2013 at 5:16 PM, Xinliang David Li <davidxl@google.com> wrote:
> On Wed, Sep 18, 2013 at 4:51 PM, Dehao Chen <dehao@google.com> wrote:
>> This patch fixup the call graph edge targets during AutoFDO pass, so
>> that when rebuilding call graph edges, it can find the correct callee.
>>
>> Bootstrapped and passed regression test. Benchmark tests on-going.
>>
>> Ok for google-4_8 branch?
>>
>> Thanks,
>> Dehao
>>
>> Index: gcc/Makefile.in
>> ===================================================================
>> --- gcc/Makefile.in (revision 202725)
>> +++ gcc/Makefile.in (working copy)
>> @@ -2960,7 +2960,7 @@ coverage.o : coverage.c $(GCOV_IO_H) $(CONFIG_H) $
>>  auto-profile.o : auto-profile.c $(CONFIG_H) $(SYSTEM_H) $(FLAGS_H) \
>>     $(BASIC_BLOCK_H) $(DIAGNOSTIC_CORE_H) $(GCOV_IO_H) $(INPUT_H) profile.h \
>>     $(LANGHOOKS_H) $(OPTS_H) $(TREE_PASS_H) $(CGRAPH_H) $(GIMPLE_H)
>> value-prof.h \
>> -   $(COVERAGE_H) coretypes.h $(TREE_H) $(PARAMS_H) $(AUTO_PROFILE_H)
>> +   $(COVERAGE_H) coretypes.h $(TREE_H) $(PARAMS_H) l-ipo.h $(AUTO_PROFILE_H)
>>  cselib.o : cselib.c $(CONFIG_H) $(SYSTEM_H) coretypes.h dumpfile.h
>> $(TM_H) $(RTL_H) \
>>     $(REGS_H) hard-reg-set.h $(FLAGS_H) insn-config.h $(RECOG_H) \
>>     $(EMIT_RTL_H) $(DIAGNOSTIC_CORE_H) $(FUNCTION_H) \
>> Index: gcc/auto-profile.c
>> ===================================================================
>> --- gcc/auto-profile.c (revision 202725)
>> +++ gcc/auto-profile.c (working copy)
>> @@ -46,6 +46,7 @@ along with GCC; see the file COPYING3.  If not see
>>  #include "value-prof.h"
>>  #include "coverage.h"
>>  #include "params.h"
>> +#include "l-ipo.h"
>>  #include "auto-profile.h"
>>
>>  /* The following routines implements AutoFDO optimization.
>> @@ -1290,6 +1291,13 @@ auto_profile (void)
>>    init_node_map ();
>>    profile_info = autofdo::afdo_profile_info;
>>
>> +  cgraph_pre_profiling_inlining_done = true;
>> +  cgraph_process_module_scope_statics ();
>> +  /* Now perform link to allow cross module inlining.  */
>> +  cgraph_do_link ();
>> +  varpool_do_link ();
>> +  cgraph_unify_type_alias_sets ();
>> +
>>    FOR_EACH_FUNCTION (node)
>>      {
>>        if (!gimple_has_body_p (node->symbol.decl))
>> @@ -1301,6 +1309,21 @@ auto_profile (void)
>>
>>        push_cfun (DECL_STRUCT_FUNCTION (node->symbol.decl));
>>
>> +      if (L_IPO_COMP_MODE)
>> +        {
>> +  basic_block bb;
>> +  FOR_EACH_BB (bb)
>> +    {
>> +      gimple_stmt_iterator gsi;
>> +      for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
>> + {
>> +  gimple stmt = gsi_stmt (gsi);
>> +  if (is_gimple_call (stmt))
>> +    lipo_fixup_cgraph_edge_call_target (stmt);
>> + }
>> +    }
>> + }
>> +
>
> Need this:
>
>
>       if (execute_fixup_cfg () & TODO_cleanup_cfg)
>           cleanup_tree_cfg ();
>
>
> as in tree-profiling. Changing call stmt targets can lead to CFG changes.
>
>
>
> David
>
>>        autofdo::afdo_annotate_cfg ();
>>        compute_function_frequency ();
>>        update_ssa (TODO_update_ssa);
>> @@ -1309,13 +1332,6 @@ auto_profile (void)
>>        pop_cfun ();
>>      }
>>
>> -  cgraph_pre_profiling_inlining_done = true;
>> -  cgraph_process_module_scope_statics ();
>> -  /* Now perform link to allow cross module inlining.  */
>> -  cgraph_do_link ();
>> -  varpool_do_link ();
>> -  cgraph_unify_type_alias_sets ();
>> -
>>    return TODO_rebuild_cgraph_edges;
>>  }

Comments

Xinliang David Li Sept. 19, 2013, 5:39 p.m. UTC | #1
ok.

David

On Thu, Sep 19, 2013 at 10:10 AM, Dehao Chen <dehao@google.com> wrote:
> Thanks, patch updated:
>
> Index: gcc/Makefile.in
> ===================================================================
> --- gcc/Makefile.in (revision 202725)
> +++ gcc/Makefile.in (working copy)
> @@ -2960,7 +2960,7 @@ coverage.o : coverage.c $(GCOV_IO_H) $(CONFIG_H) $
>  auto-profile.o : auto-profile.c $(CONFIG_H) $(SYSTEM_H) $(FLAGS_H) \
>     $(BASIC_BLOCK_H) $(DIAGNOSTIC_CORE_H) $(GCOV_IO_H) $(INPUT_H) profile.h \
>     $(LANGHOOKS_H) $(OPTS_H) $(TREE_PASS_H) $(CGRAPH_H) $(GIMPLE_H)
> value-prof.h \
> -   $(COVERAGE_H) coretypes.h $(TREE_H) $(PARAMS_H) $(AUTO_PROFILE_H)
> +   $(COVERAGE_H) coretypes.h $(TREE_H) $(PARAMS_H) l-ipo.h $(AUTO_PROFILE_H)
>  cselib.o : cselib.c $(CONFIG_H) $(SYSTEM_H) coretypes.h dumpfile.h
> $(TM_H) $(RTL_H) \
>     $(REGS_H) hard-reg-set.h $(FLAGS_H) insn-config.h $(RECOG_H) \
>     $(EMIT_RTL_H) $(DIAGNOSTIC_CORE_H) $(FUNCTION_H) \
> Index: gcc/auto-profile.c
> ===================================================================
> --- gcc/auto-profile.c (revision 202725)
> +++ gcc/auto-profile.c (working copy)
> @@ -46,6 +46,7 @@ along with GCC; see the file COPYING3.  If not see
>  #include "value-prof.h"
>  #include "coverage.h"
>  #include "params.h"
> +#include "l-ipo.h"
>  #include "auto-profile.h"
>
>  /* The following routines implements AutoFDO optimization.
> @@ -1290,6 +1291,13 @@ auto_profile (void)
>    init_node_map ();
>    profile_info = autofdo::afdo_profile_info;
>
> +  cgraph_pre_profiling_inlining_done = true;
> +  cgraph_process_module_scope_statics ();
> +  /* Now perform link to allow cross module inlining.  */
> +  cgraph_do_link ();
> +  varpool_do_link ();
> +  cgraph_unify_type_alias_sets ();
> +
>    FOR_EACH_FUNCTION (node)
>      {
>        if (!gimple_has_body_p (node->symbol.decl))
> @@ -1301,21 +1309,33 @@ auto_profile (void)
>
>        push_cfun (DECL_STRUCT_FUNCTION (node->symbol.decl));
>
> +      if (L_IPO_COMP_MODE)
> +        {
> +  basic_block bb;
> +  FOR_EACH_BB (bb)
> +    {
> +      gimple_stmt_iterator gsi;
> +      for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
> + {
> +  gimple stmt = gsi_stmt (gsi);
> +  if (is_gimple_call (stmt))
> +    lipo_fixup_cgraph_edge_call_target (stmt);
> + }
> +    }
> + }
> +
>        autofdo::afdo_annotate_cfg ();
>        compute_function_frequency ();
>        update_ssa (TODO_update_ssa);
>
> +      /* Local pure-const may imply need to fixup the cfg.  */
> +      if (execute_fixup_cfg () & TODO_cleanup_cfg)
> + cleanup_tree_cfg ();
> +
>        current_function_decl = NULL;
>        pop_cfun ();
>      }
>
> -  cgraph_pre_profiling_inlining_done = true;
> -  cgraph_process_module_scope_statics ();
> -  /* Now perform link to allow cross module inlining.  */
> -  cgraph_do_link ();
> -  varpool_do_link ();
> -  cgraph_unify_type_alias_sets ();
> -
>    return TODO_rebuild_cgraph_edges;
>  }
>
> On Wed, Sep 18, 2013 at 5:16 PM, Xinliang David Li <davidxl@google.com> wrote:
>> On Wed, Sep 18, 2013 at 4:51 PM, Dehao Chen <dehao@google.com> wrote:
>>> This patch fixup the call graph edge targets during AutoFDO pass, so
>>> that when rebuilding call graph edges, it can find the correct callee.
>>>
>>> Bootstrapped and passed regression test. Benchmark tests on-going.
>>>
>>> Ok for google-4_8 branch?
>>>
>>> Thanks,
>>> Dehao
>>>
>>> Index: gcc/Makefile.in
>>> ===================================================================
>>> --- gcc/Makefile.in (revision 202725)
>>> +++ gcc/Makefile.in (working copy)
>>> @@ -2960,7 +2960,7 @@ coverage.o : coverage.c $(GCOV_IO_H) $(CONFIG_H) $
>>>  auto-profile.o : auto-profile.c $(CONFIG_H) $(SYSTEM_H) $(FLAGS_H) \
>>>     $(BASIC_BLOCK_H) $(DIAGNOSTIC_CORE_H) $(GCOV_IO_H) $(INPUT_H) profile.h \
>>>     $(LANGHOOKS_H) $(OPTS_H) $(TREE_PASS_H) $(CGRAPH_H) $(GIMPLE_H)
>>> value-prof.h \
>>> -   $(COVERAGE_H) coretypes.h $(TREE_H) $(PARAMS_H) $(AUTO_PROFILE_H)
>>> +   $(COVERAGE_H) coretypes.h $(TREE_H) $(PARAMS_H) l-ipo.h $(AUTO_PROFILE_H)
>>>  cselib.o : cselib.c $(CONFIG_H) $(SYSTEM_H) coretypes.h dumpfile.h
>>> $(TM_H) $(RTL_H) \
>>>     $(REGS_H) hard-reg-set.h $(FLAGS_H) insn-config.h $(RECOG_H) \
>>>     $(EMIT_RTL_H) $(DIAGNOSTIC_CORE_H) $(FUNCTION_H) \
>>> Index: gcc/auto-profile.c
>>> ===================================================================
>>> --- gcc/auto-profile.c (revision 202725)
>>> +++ gcc/auto-profile.c (working copy)
>>> @@ -46,6 +46,7 @@ along with GCC; see the file COPYING3.  If not see
>>>  #include "value-prof.h"
>>>  #include "coverage.h"
>>>  #include "params.h"
>>> +#include "l-ipo.h"
>>>  #include "auto-profile.h"
>>>
>>>  /* The following routines implements AutoFDO optimization.
>>> @@ -1290,6 +1291,13 @@ auto_profile (void)
>>>    init_node_map ();
>>>    profile_info = autofdo::afdo_profile_info;
>>>
>>> +  cgraph_pre_profiling_inlining_done = true;
>>> +  cgraph_process_module_scope_statics ();
>>> +  /* Now perform link to allow cross module inlining.  */
>>> +  cgraph_do_link ();
>>> +  varpool_do_link ();
>>> +  cgraph_unify_type_alias_sets ();
>>> +
>>>    FOR_EACH_FUNCTION (node)
>>>      {
>>>        if (!gimple_has_body_p (node->symbol.decl))
>>> @@ -1301,6 +1309,21 @@ auto_profile (void)
>>>
>>>        push_cfun (DECL_STRUCT_FUNCTION (node->symbol.decl));
>>>
>>> +      if (L_IPO_COMP_MODE)
>>> +        {
>>> +  basic_block bb;
>>> +  FOR_EACH_BB (bb)
>>> +    {
>>> +      gimple_stmt_iterator gsi;
>>> +      for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
>>> + {
>>> +  gimple stmt = gsi_stmt (gsi);
>>> +  if (is_gimple_call (stmt))
>>> +    lipo_fixup_cgraph_edge_call_target (stmt);
>>> + }
>>> +    }
>>> + }
>>> +
>>
>> Need this:
>>
>>
>>       if (execute_fixup_cfg () & TODO_cleanup_cfg)
>>           cleanup_tree_cfg ();
>>
>>
>> as in tree-profiling. Changing call stmt targets can lead to CFG changes.
>>
>>
>>
>> David
>>
>>>        autofdo::afdo_annotate_cfg ();
>>>        compute_function_frequency ();
>>>        update_ssa (TODO_update_ssa);
>>> @@ -1309,13 +1332,6 @@ auto_profile (void)
>>>        pop_cfun ();
>>>      }
>>>
>>> -  cgraph_pre_profiling_inlining_done = true;
>>> -  cgraph_process_module_scope_statics ();
>>> -  /* Now perform link to allow cross module inlining.  */
>>> -  cgraph_do_link ();
>>> -  varpool_do_link ();
>>> -  cgraph_unify_type_alias_sets ();
>>> -
>>>    return TODO_rebuild_cgraph_edges;
>>>  }
Xinliang David Li Sept. 20, 2013, 4:28 a.m. UTC | #2
I did not catch this in the last review. The cleanup CFG should be
done before afdo_annotate_cfg and just after call statement fixup.
Otherwise the cfg cleanup will zero out all profile counts. After
moving this up, you don't need the follow up patch which sets the
cgraph node's count -- that should be done in the
rebuild_cgraph_edges.

David

On Thu, Sep 19, 2013 at 10:10 AM, Dehao Chen <dehao@google.com> wrote:
> Thanks, patch updated:
>
> Index: gcc/Makefile.in
> ===================================================================
> --- gcc/Makefile.in (revision 202725)
> +++ gcc/Makefile.in (working copy)
> @@ -2960,7 +2960,7 @@ coverage.o : coverage.c $(GCOV_IO_H) $(CONFIG_H) $
>  auto-profile.o : auto-profile.c $(CONFIG_H) $(SYSTEM_H) $(FLAGS_H) \
>     $(BASIC_BLOCK_H) $(DIAGNOSTIC_CORE_H) $(GCOV_IO_H) $(INPUT_H) profile.h \
>     $(LANGHOOKS_H) $(OPTS_H) $(TREE_PASS_H) $(CGRAPH_H) $(GIMPLE_H)
> value-prof.h \
> -   $(COVERAGE_H) coretypes.h $(TREE_H) $(PARAMS_H) $(AUTO_PROFILE_H)
> +   $(COVERAGE_H) coretypes.h $(TREE_H) $(PARAMS_H) l-ipo.h $(AUTO_PROFILE_H)
>  cselib.o : cselib.c $(CONFIG_H) $(SYSTEM_H) coretypes.h dumpfile.h
> $(TM_H) $(RTL_H) \
>     $(REGS_H) hard-reg-set.h $(FLAGS_H) insn-config.h $(RECOG_H) \
>     $(EMIT_RTL_H) $(DIAGNOSTIC_CORE_H) $(FUNCTION_H) \
> Index: gcc/auto-profile.c
> ===================================================================
> --- gcc/auto-profile.c (revision 202725)
> +++ gcc/auto-profile.c (working copy)
> @@ -46,6 +46,7 @@ along with GCC; see the file COPYING3.  If not see
>  #include "value-prof.h"
>  #include "coverage.h"
>  #include "params.h"
> +#include "l-ipo.h"
>  #include "auto-profile.h"
>
>  /* The following routines implements AutoFDO optimization.
> @@ -1290,6 +1291,13 @@ auto_profile (void)
>    init_node_map ();
>    profile_info = autofdo::afdo_profile_info;
>
> +  cgraph_pre_profiling_inlining_done = true;
> +  cgraph_process_module_scope_statics ();
> +  /* Now perform link to allow cross module inlining.  */
> +  cgraph_do_link ();
> +  varpool_do_link ();
> +  cgraph_unify_type_alias_sets ();
> +
>    FOR_EACH_FUNCTION (node)
>      {
>        if (!gimple_has_body_p (node->symbol.decl))
> @@ -1301,21 +1309,33 @@ auto_profile (void)
>
>        push_cfun (DECL_STRUCT_FUNCTION (node->symbol.decl));
>
> +      if (L_IPO_COMP_MODE)
> +        {
> +  basic_block bb;
> +  FOR_EACH_BB (bb)
> +    {
> +      gimple_stmt_iterator gsi;
> +      for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
> + {
> +  gimple stmt = gsi_stmt (gsi);
> +  if (is_gimple_call (stmt))
> +    lipo_fixup_cgraph_edge_call_target (stmt);
> + }
> +    }
> + }
> +
>        autofdo::afdo_annotate_cfg ();
>        compute_function_frequency ();
>        update_ssa (TODO_update_ssa);
>
> +      /* Local pure-const may imply need to fixup the cfg.  */
> +      if (execute_fixup_cfg () & TODO_cleanup_cfg)
> + cleanup_tree_cfg ();
> +
>        current_function_decl = NULL;
>        pop_cfun ();
>      }
>
> -  cgraph_pre_profiling_inlining_done = true;
> -  cgraph_process_module_scope_statics ();
> -  /* Now perform link to allow cross module inlining.  */
> -  cgraph_do_link ();
> -  varpool_do_link ();
> -  cgraph_unify_type_alias_sets ();
> -
>    return TODO_rebuild_cgraph_edges;
>  }
>
> On Wed, Sep 18, 2013 at 5:16 PM, Xinliang David Li <davidxl@google.com> wrote:
>> On Wed, Sep 18, 2013 at 4:51 PM, Dehao Chen <dehao@google.com> wrote:
>>> This patch fixup the call graph edge targets during AutoFDO pass, so
>>> that when rebuilding call graph edges, it can find the correct callee.
>>>
>>> Bootstrapped and passed regression test. Benchmark tests on-going.
>>>
>>> Ok for google-4_8 branch?
>>>
>>> Thanks,
>>> Dehao
>>>
>>> Index: gcc/Makefile.in
>>> ===================================================================
>>> --- gcc/Makefile.in (revision 202725)
>>> +++ gcc/Makefile.in (working copy)
>>> @@ -2960,7 +2960,7 @@ coverage.o : coverage.c $(GCOV_IO_H) $(CONFIG_H) $
>>>  auto-profile.o : auto-profile.c $(CONFIG_H) $(SYSTEM_H) $(FLAGS_H) \
>>>     $(BASIC_BLOCK_H) $(DIAGNOSTIC_CORE_H) $(GCOV_IO_H) $(INPUT_H) profile.h \
>>>     $(LANGHOOKS_H) $(OPTS_H) $(TREE_PASS_H) $(CGRAPH_H) $(GIMPLE_H)
>>> value-prof.h \
>>> -   $(COVERAGE_H) coretypes.h $(TREE_H) $(PARAMS_H) $(AUTO_PROFILE_H)
>>> +   $(COVERAGE_H) coretypes.h $(TREE_H) $(PARAMS_H) l-ipo.h $(AUTO_PROFILE_H)
>>>  cselib.o : cselib.c $(CONFIG_H) $(SYSTEM_H) coretypes.h dumpfile.h
>>> $(TM_H) $(RTL_H) \
>>>     $(REGS_H) hard-reg-set.h $(FLAGS_H) insn-config.h $(RECOG_H) \
>>>     $(EMIT_RTL_H) $(DIAGNOSTIC_CORE_H) $(FUNCTION_H) \
>>> Index: gcc/auto-profile.c
>>> ===================================================================
>>> --- gcc/auto-profile.c (revision 202725)
>>> +++ gcc/auto-profile.c (working copy)
>>> @@ -46,6 +46,7 @@ along with GCC; see the file COPYING3.  If not see
>>>  #include "value-prof.h"
>>>  #include "coverage.h"
>>>  #include "params.h"
>>> +#include "l-ipo.h"
>>>  #include "auto-profile.h"
>>>
>>>  /* The following routines implements AutoFDO optimization.
>>> @@ -1290,6 +1291,13 @@ auto_profile (void)
>>>    init_node_map ();
>>>    profile_info = autofdo::afdo_profile_info;
>>>
>>> +  cgraph_pre_profiling_inlining_done = true;
>>> +  cgraph_process_module_scope_statics ();
>>> +  /* Now perform link to allow cross module inlining.  */
>>> +  cgraph_do_link ();
>>> +  varpool_do_link ();
>>> +  cgraph_unify_type_alias_sets ();
>>> +
>>>    FOR_EACH_FUNCTION (node)
>>>      {
>>>        if (!gimple_has_body_p (node->symbol.decl))
>>> @@ -1301,6 +1309,21 @@ auto_profile (void)
>>>
>>>        push_cfun (DECL_STRUCT_FUNCTION (node->symbol.decl));
>>>
>>> +      if (L_IPO_COMP_MODE)
>>> +        {
>>> +  basic_block bb;
>>> +  FOR_EACH_BB (bb)
>>> +    {
>>> +      gimple_stmt_iterator gsi;
>>> +      for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
>>> + {
>>> +  gimple stmt = gsi_stmt (gsi);
>>> +  if (is_gimple_call (stmt))
>>> +    lipo_fixup_cgraph_edge_call_target (stmt);
>>> + }
>>> +    }
>>> + }
>>> +
>>
>> Need this:
>>
>>
>>       if (execute_fixup_cfg () & TODO_cleanup_cfg)
>>           cleanup_tree_cfg ();
>>
>>
>> as in tree-profiling. Changing call stmt targets can lead to CFG changes.
>>
>>
>>
>> David
>>
>>>        autofdo::afdo_annotate_cfg ();
>>>        compute_function_frequency ();
>>>        update_ssa (TODO_update_ssa);
>>> @@ -1309,13 +1332,6 @@ auto_profile (void)
>>>        pop_cfun ();
>>>      }
>>>
>>> -  cgraph_pre_profiling_inlining_done = true;
>>> -  cgraph_process_module_scope_statics ();
>>> -  /* Now perform link to allow cross module inlining.  */
>>> -  cgraph_do_link ();
>>> -  varpool_do_link ();
>>> -  cgraph_unify_type_alias_sets ();
>>> -
>>>    return TODO_rebuild_cgraph_edges;
>>>  }
Dehao Chen Sept. 20, 2013, 10:29 p.m. UTC | #3
Now that both statement fixup and cfg cleanup are moved after
annotation. So setting of cgraph node's count is still needed, right?

Thanks,
Dehao

On Thu, Sep 19, 2013 at 9:28 PM, Xinliang David Li <davidxl@google.com> wrote:
> I did not catch this in the last review. The cleanup CFG should be
> done before afdo_annotate_cfg and just after call statement fixup.
> Otherwise the cfg cleanup will zero out all profile counts. After
> moving this up, you don't need the follow up patch which sets the
> cgraph node's count -- that should be done in the
> rebuild_cgraph_edges.
>
> David
>
> On Thu, Sep 19, 2013 at 10:10 AM, Dehao Chen <dehao@google.com> wrote:
>> Thanks, patch updated:
>>
>> Index: gcc/Makefile.in
>> ===================================================================
>> --- gcc/Makefile.in (revision 202725)
>> +++ gcc/Makefile.in (working copy)
>> @@ -2960,7 +2960,7 @@ coverage.o : coverage.c $(GCOV_IO_H) $(CONFIG_H) $
>>  auto-profile.o : auto-profile.c $(CONFIG_H) $(SYSTEM_H) $(FLAGS_H) \
>>     $(BASIC_BLOCK_H) $(DIAGNOSTIC_CORE_H) $(GCOV_IO_H) $(INPUT_H) profile.h \
>>     $(LANGHOOKS_H) $(OPTS_H) $(TREE_PASS_H) $(CGRAPH_H) $(GIMPLE_H)
>> value-prof.h \
>> -   $(COVERAGE_H) coretypes.h $(TREE_H) $(PARAMS_H) $(AUTO_PROFILE_H)
>> +   $(COVERAGE_H) coretypes.h $(TREE_H) $(PARAMS_H) l-ipo.h $(AUTO_PROFILE_H)
>>  cselib.o : cselib.c $(CONFIG_H) $(SYSTEM_H) coretypes.h dumpfile.h
>> $(TM_H) $(RTL_H) \
>>     $(REGS_H) hard-reg-set.h $(FLAGS_H) insn-config.h $(RECOG_H) \
>>     $(EMIT_RTL_H) $(DIAGNOSTIC_CORE_H) $(FUNCTION_H) \
>> Index: gcc/auto-profile.c
>> ===================================================================
>> --- gcc/auto-profile.c (revision 202725)
>> +++ gcc/auto-profile.c (working copy)
>> @@ -46,6 +46,7 @@ along with GCC; see the file COPYING3.  If not see
>>  #include "value-prof.h"
>>  #include "coverage.h"
>>  #include "params.h"
>> +#include "l-ipo.h"
>>  #include "auto-profile.h"
>>
>>  /* The following routines implements AutoFDO optimization.
>> @@ -1290,6 +1291,13 @@ auto_profile (void)
>>    init_node_map ();
>>    profile_info = autofdo::afdo_profile_info;
>>
>> +  cgraph_pre_profiling_inlining_done = true;
>> +  cgraph_process_module_scope_statics ();
>> +  /* Now perform link to allow cross module inlining.  */
>> +  cgraph_do_link ();
>> +  varpool_do_link ();
>> +  cgraph_unify_type_alias_sets ();
>> +
>>    FOR_EACH_FUNCTION (node)
>>      {
>>        if (!gimple_has_body_p (node->symbol.decl))
>> @@ -1301,21 +1309,33 @@ auto_profile (void)
>>
>>        push_cfun (DECL_STRUCT_FUNCTION (node->symbol.decl));
>>
>> +      if (L_IPO_COMP_MODE)
>> +        {
>> +  basic_block bb;
>> +  FOR_EACH_BB (bb)
>> +    {
>> +      gimple_stmt_iterator gsi;
>> +      for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
>> + {
>> +  gimple stmt = gsi_stmt (gsi);
>> +  if (is_gimple_call (stmt))
>> +    lipo_fixup_cgraph_edge_call_target (stmt);
>> + }
>> +    }
>> + }
>> +
>>        autofdo::afdo_annotate_cfg ();
>>        compute_function_frequency ();
>>        update_ssa (TODO_update_ssa);
>>
>> +      /* Local pure-const may imply need to fixup the cfg.  */
>> +      if (execute_fixup_cfg () & TODO_cleanup_cfg)
>> + cleanup_tree_cfg ();
>> +
>>        current_function_decl = NULL;
>>        pop_cfun ();
>>      }
>>
>> -  cgraph_pre_profiling_inlining_done = true;
>> -  cgraph_process_module_scope_statics ();
>> -  /* Now perform link to allow cross module inlining.  */
>> -  cgraph_do_link ();
>> -  varpool_do_link ();
>> -  cgraph_unify_type_alias_sets ();
>> -
>>    return TODO_rebuild_cgraph_edges;
>>  }
>>
>> On Wed, Sep 18, 2013 at 5:16 PM, Xinliang David Li <davidxl@google.com> wrote:
>>> On Wed, Sep 18, 2013 at 4:51 PM, Dehao Chen <dehao@google.com> wrote:
>>>> This patch fixup the call graph edge targets during AutoFDO pass, so
>>>> that when rebuilding call graph edges, it can find the correct callee.
>>>>
>>>> Bootstrapped and passed regression test. Benchmark tests on-going.
>>>>
>>>> Ok for google-4_8 branch?
>>>>
>>>> Thanks,
>>>> Dehao
>>>>
>>>> Index: gcc/Makefile.in
>>>> ===================================================================
>>>> --- gcc/Makefile.in (revision 202725)
>>>> +++ gcc/Makefile.in (working copy)
>>>> @@ -2960,7 +2960,7 @@ coverage.o : coverage.c $(GCOV_IO_H) $(CONFIG_H) $
>>>>  auto-profile.o : auto-profile.c $(CONFIG_H) $(SYSTEM_H) $(FLAGS_H) \
>>>>     $(BASIC_BLOCK_H) $(DIAGNOSTIC_CORE_H) $(GCOV_IO_H) $(INPUT_H) profile.h \
>>>>     $(LANGHOOKS_H) $(OPTS_H) $(TREE_PASS_H) $(CGRAPH_H) $(GIMPLE_H)
>>>> value-prof.h \
>>>> -   $(COVERAGE_H) coretypes.h $(TREE_H) $(PARAMS_H) $(AUTO_PROFILE_H)
>>>> +   $(COVERAGE_H) coretypes.h $(TREE_H) $(PARAMS_H) l-ipo.h $(AUTO_PROFILE_H)
>>>>  cselib.o : cselib.c $(CONFIG_H) $(SYSTEM_H) coretypes.h dumpfile.h
>>>> $(TM_H) $(RTL_H) \
>>>>     $(REGS_H) hard-reg-set.h $(FLAGS_H) insn-config.h $(RECOG_H) \
>>>>     $(EMIT_RTL_H) $(DIAGNOSTIC_CORE_H) $(FUNCTION_H) \
>>>> Index: gcc/auto-profile.c
>>>> ===================================================================
>>>> --- gcc/auto-profile.c (revision 202725)
>>>> +++ gcc/auto-profile.c (working copy)
>>>> @@ -46,6 +46,7 @@ along with GCC; see the file COPYING3.  If not see
>>>>  #include "value-prof.h"
>>>>  #include "coverage.h"
>>>>  #include "params.h"
>>>> +#include "l-ipo.h"
>>>>  #include "auto-profile.h"
>>>>
>>>>  /* The following routines implements AutoFDO optimization.
>>>> @@ -1290,6 +1291,13 @@ auto_profile (void)
>>>>    init_node_map ();
>>>>    profile_info = autofdo::afdo_profile_info;
>>>>
>>>> +  cgraph_pre_profiling_inlining_done = true;
>>>> +  cgraph_process_module_scope_statics ();
>>>> +  /* Now perform link to allow cross module inlining.  */
>>>> +  cgraph_do_link ();
>>>> +  varpool_do_link ();
>>>> +  cgraph_unify_type_alias_sets ();
>>>> +
>>>>    FOR_EACH_FUNCTION (node)
>>>>      {
>>>>        if (!gimple_has_body_p (node->symbol.decl))
>>>> @@ -1301,6 +1309,21 @@ auto_profile (void)
>>>>
>>>>        push_cfun (DECL_STRUCT_FUNCTION (node->symbol.decl));
>>>>
>>>> +      if (L_IPO_COMP_MODE)
>>>> +        {
>>>> +  basic_block bb;
>>>> +  FOR_EACH_BB (bb)
>>>> +    {
>>>> +      gimple_stmt_iterator gsi;
>>>> +      for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
>>>> + {
>>>> +  gimple stmt = gsi_stmt (gsi);
>>>> +  if (is_gimple_call (stmt))
>>>> +    lipo_fixup_cgraph_edge_call_target (stmt);
>>>> + }
>>>> +    }
>>>> + }
>>>> +
>>>
>>> Need this:
>>>
>>>
>>>       if (execute_fixup_cfg () & TODO_cleanup_cfg)
>>>           cleanup_tree_cfg ();
>>>
>>>
>>> as in tree-profiling. Changing call stmt targets can lead to CFG changes.
>>>
>>>
>>>
>>> David
>>>
>>>>        autofdo::afdo_annotate_cfg ();
>>>>        compute_function_frequency ();
>>>>        update_ssa (TODO_update_ssa);
>>>> @@ -1309,13 +1332,6 @@ auto_profile (void)
>>>>        pop_cfun ();
>>>>      }
>>>>
>>>> -  cgraph_pre_profiling_inlining_done = true;
>>>> -  cgraph_process_module_scope_statics ();
>>>> -  /* Now perform link to allow cross module inlining.  */
>>>> -  cgraph_do_link ();
>>>> -  varpool_do_link ();
>>>> -  cgraph_unify_type_alias_sets ();
>>>> -
>>>>    return TODO_rebuild_cgraph_edges;
>>>>  }
Xinliang David Li Sept. 20, 2013, 10:39 p.m. UTC | #4
Yes -- in current form, it is still needed. As you explained, the
linking & stmt fixup will need to be done after profile annotation as
autofdo uses assembler name for icall target matching.

David

On Fri, Sep 20, 2013 at 3:29 PM, Dehao Chen <dehao@google.com> wrote:
> Now that both statement fixup and cfg cleanup are moved after
> annotation. So setting of cgraph node's count is still needed, right?
>
> Thanks,
> Dehao
>
> On Thu, Sep 19, 2013 at 9:28 PM, Xinliang David Li <davidxl@google.com> wrote:
>> I did not catch this in the last review. The cleanup CFG should be
>> done before afdo_annotate_cfg and just after call statement fixup.
>> Otherwise the cfg cleanup will zero out all profile counts. After
>> moving this up, you don't need the follow up patch which sets the
>> cgraph node's count -- that should be done in the
>> rebuild_cgraph_edges.
>>
>> David
>>
>> On Thu, Sep 19, 2013 at 10:10 AM, Dehao Chen <dehao@google.com> wrote:
>>> Thanks, patch updated:
>>>
>>> Index: gcc/Makefile.in
>>> ===================================================================
>>> --- gcc/Makefile.in (revision 202725)
>>> +++ gcc/Makefile.in (working copy)
>>> @@ -2960,7 +2960,7 @@ coverage.o : coverage.c $(GCOV_IO_H) $(CONFIG_H) $
>>>  auto-profile.o : auto-profile.c $(CONFIG_H) $(SYSTEM_H) $(FLAGS_H) \
>>>     $(BASIC_BLOCK_H) $(DIAGNOSTIC_CORE_H) $(GCOV_IO_H) $(INPUT_H) profile.h \
>>>     $(LANGHOOKS_H) $(OPTS_H) $(TREE_PASS_H) $(CGRAPH_H) $(GIMPLE_H)
>>> value-prof.h \
>>> -   $(COVERAGE_H) coretypes.h $(TREE_H) $(PARAMS_H) $(AUTO_PROFILE_H)
>>> +   $(COVERAGE_H) coretypes.h $(TREE_H) $(PARAMS_H) l-ipo.h $(AUTO_PROFILE_H)
>>>  cselib.o : cselib.c $(CONFIG_H) $(SYSTEM_H) coretypes.h dumpfile.h
>>> $(TM_H) $(RTL_H) \
>>>     $(REGS_H) hard-reg-set.h $(FLAGS_H) insn-config.h $(RECOG_H) \
>>>     $(EMIT_RTL_H) $(DIAGNOSTIC_CORE_H) $(FUNCTION_H) \
>>> Index: gcc/auto-profile.c
>>> ===================================================================
>>> --- gcc/auto-profile.c (revision 202725)
>>> +++ gcc/auto-profile.c (working copy)
>>> @@ -46,6 +46,7 @@ along with GCC; see the file COPYING3.  If not see
>>>  #include "value-prof.h"
>>>  #include "coverage.h"
>>>  #include "params.h"
>>> +#include "l-ipo.h"
>>>  #include "auto-profile.h"
>>>
>>>  /* The following routines implements AutoFDO optimization.
>>> @@ -1290,6 +1291,13 @@ auto_profile (void)
>>>    init_node_map ();
>>>    profile_info = autofdo::afdo_profile_info;
>>>
>>> +  cgraph_pre_profiling_inlining_done = true;
>>> +  cgraph_process_module_scope_statics ();
>>> +  /* Now perform link to allow cross module inlining.  */
>>> +  cgraph_do_link ();
>>> +  varpool_do_link ();
>>> +  cgraph_unify_type_alias_sets ();
>>> +
>>>    FOR_EACH_FUNCTION (node)
>>>      {
>>>        if (!gimple_has_body_p (node->symbol.decl))
>>> @@ -1301,21 +1309,33 @@ auto_profile (void)
>>>
>>>        push_cfun (DECL_STRUCT_FUNCTION (node->symbol.decl));
>>>
>>> +      if (L_IPO_COMP_MODE)
>>> +        {
>>> +  basic_block bb;
>>> +  FOR_EACH_BB (bb)
>>> +    {
>>> +      gimple_stmt_iterator gsi;
>>> +      for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
>>> + {
>>> +  gimple stmt = gsi_stmt (gsi);
>>> +  if (is_gimple_call (stmt))
>>> +    lipo_fixup_cgraph_edge_call_target (stmt);
>>> + }
>>> +    }
>>> + }
>>> +
>>>        autofdo::afdo_annotate_cfg ();
>>>        compute_function_frequency ();
>>>        update_ssa (TODO_update_ssa);
>>>
>>> +      /* Local pure-const may imply need to fixup the cfg.  */
>>> +      if (execute_fixup_cfg () & TODO_cleanup_cfg)
>>> + cleanup_tree_cfg ();
>>> +
>>>        current_function_decl = NULL;
>>>        pop_cfun ();
>>>      }
>>>
>>> -  cgraph_pre_profiling_inlining_done = true;
>>> -  cgraph_process_module_scope_statics ();
>>> -  /* Now perform link to allow cross module inlining.  */
>>> -  cgraph_do_link ();
>>> -  varpool_do_link ();
>>> -  cgraph_unify_type_alias_sets ();
>>> -
>>>    return TODO_rebuild_cgraph_edges;
>>>  }
>>>
>>> On Wed, Sep 18, 2013 at 5:16 PM, Xinliang David Li <davidxl@google.com> wrote:
>>>> On Wed, Sep 18, 2013 at 4:51 PM, Dehao Chen <dehao@google.com> wrote:
>>>>> This patch fixup the call graph edge targets during AutoFDO pass, so
>>>>> that when rebuilding call graph edges, it can find the correct callee.
>>>>>
>>>>> Bootstrapped and passed regression test. Benchmark tests on-going.
>>>>>
>>>>> Ok for google-4_8 branch?
>>>>>
>>>>> Thanks,
>>>>> Dehao
>>>>>
>>>>> Index: gcc/Makefile.in
>>>>> ===================================================================
>>>>> --- gcc/Makefile.in (revision 202725)
>>>>> +++ gcc/Makefile.in (working copy)
>>>>> @@ -2960,7 +2960,7 @@ coverage.o : coverage.c $(GCOV_IO_H) $(CONFIG_H) $
>>>>>  auto-profile.o : auto-profile.c $(CONFIG_H) $(SYSTEM_H) $(FLAGS_H) \
>>>>>     $(BASIC_BLOCK_H) $(DIAGNOSTIC_CORE_H) $(GCOV_IO_H) $(INPUT_H) profile.h \
>>>>>     $(LANGHOOKS_H) $(OPTS_H) $(TREE_PASS_H) $(CGRAPH_H) $(GIMPLE_H)
>>>>> value-prof.h \
>>>>> -   $(COVERAGE_H) coretypes.h $(TREE_H) $(PARAMS_H) $(AUTO_PROFILE_H)
>>>>> +   $(COVERAGE_H) coretypes.h $(TREE_H) $(PARAMS_H) l-ipo.h $(AUTO_PROFILE_H)
>>>>>  cselib.o : cselib.c $(CONFIG_H) $(SYSTEM_H) coretypes.h dumpfile.h
>>>>> $(TM_H) $(RTL_H) \
>>>>>     $(REGS_H) hard-reg-set.h $(FLAGS_H) insn-config.h $(RECOG_H) \
>>>>>     $(EMIT_RTL_H) $(DIAGNOSTIC_CORE_H) $(FUNCTION_H) \
>>>>> Index: gcc/auto-profile.c
>>>>> ===================================================================
>>>>> --- gcc/auto-profile.c (revision 202725)
>>>>> +++ gcc/auto-profile.c (working copy)
>>>>> @@ -46,6 +46,7 @@ along with GCC; see the file COPYING3.  If not see
>>>>>  #include "value-prof.h"
>>>>>  #include "coverage.h"
>>>>>  #include "params.h"
>>>>> +#include "l-ipo.h"
>>>>>  #include "auto-profile.h"
>>>>>
>>>>>  /* The following routines implements AutoFDO optimization.
>>>>> @@ -1290,6 +1291,13 @@ auto_profile (void)
>>>>>    init_node_map ();
>>>>>    profile_info = autofdo::afdo_profile_info;
>>>>>
>>>>> +  cgraph_pre_profiling_inlining_done = true;
>>>>> +  cgraph_process_module_scope_statics ();
>>>>> +  /* Now perform link to allow cross module inlining.  */
>>>>> +  cgraph_do_link ();
>>>>> +  varpool_do_link ();
>>>>> +  cgraph_unify_type_alias_sets ();
>>>>> +
>>>>>    FOR_EACH_FUNCTION (node)
>>>>>      {
>>>>>        if (!gimple_has_body_p (node->symbol.decl))
>>>>> @@ -1301,6 +1309,21 @@ auto_profile (void)
>>>>>
>>>>>        push_cfun (DECL_STRUCT_FUNCTION (node->symbol.decl));
>>>>>
>>>>> +      if (L_IPO_COMP_MODE)
>>>>> +        {
>>>>> +  basic_block bb;
>>>>> +  FOR_EACH_BB (bb)
>>>>> +    {
>>>>> +      gimple_stmt_iterator gsi;
>>>>> +      for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
>>>>> + {
>>>>> +  gimple stmt = gsi_stmt (gsi);
>>>>> +  if (is_gimple_call (stmt))
>>>>> +    lipo_fixup_cgraph_edge_call_target (stmt);
>>>>> + }
>>>>> +    }
>>>>> + }
>>>>> +
>>>>
>>>> Need this:
>>>>
>>>>
>>>>       if (execute_fixup_cfg () & TODO_cleanup_cfg)
>>>>           cleanup_tree_cfg ();
>>>>
>>>>
>>>> as in tree-profiling. Changing call stmt targets can lead to CFG changes.
>>>>
>>>>
>>>>
>>>> David
>>>>
>>>>>        autofdo::afdo_annotate_cfg ();
>>>>>        compute_function_frequency ();
>>>>>        update_ssa (TODO_update_ssa);
>>>>> @@ -1309,13 +1332,6 @@ auto_profile (void)
>>>>>        pop_cfun ();
>>>>>      }
>>>>>
>>>>> -  cgraph_pre_profiling_inlining_done = true;
>>>>> -  cgraph_process_module_scope_statics ();
>>>>> -  /* Now perform link to allow cross module inlining.  */
>>>>> -  cgraph_do_link ();
>>>>> -  varpool_do_link ();
>>>>> -  cgraph_unify_type_alias_sets ();
>>>>> -
>>>>>    return TODO_rebuild_cgraph_edges;
>>>>>  }
diff mbox

Patch

Index: gcc/Makefile.in
===================================================================
--- gcc/Makefile.in (revision 202725)
+++ gcc/Makefile.in (working copy)
@@ -2960,7 +2960,7 @@  coverage.o : coverage.c $(GCOV_IO_H) $(CONFIG_H) $
 auto-profile.o : auto-profile.c $(CONFIG_H) $(SYSTEM_H) $(FLAGS_H) \
    $(BASIC_BLOCK_H) $(DIAGNOSTIC_CORE_H) $(GCOV_IO_H) $(INPUT_H) profile.h \
    $(LANGHOOKS_H) $(OPTS_H) $(TREE_PASS_H) $(CGRAPH_H) $(GIMPLE_H)
value-prof.h \
-   $(COVERAGE_H) coretypes.h $(TREE_H) $(PARAMS_H) $(AUTO_PROFILE_H)
+   $(COVERAGE_H) coretypes.h $(TREE_H) $(PARAMS_H) l-ipo.h $(AUTO_PROFILE_H)
 cselib.o : cselib.c $(CONFIG_H) $(SYSTEM_H) coretypes.h dumpfile.h
$(TM_H) $(RTL_H) \
    $(REGS_H) hard-reg-set.h $(FLAGS_H) insn-config.h $(RECOG_H) \
    $(EMIT_RTL_H) $(DIAGNOSTIC_CORE_H) $(FUNCTION_H) \
Index: gcc/auto-profile.c
===================================================================
--- gcc/auto-profile.c (revision 202725)
+++ gcc/auto-profile.c (working copy)
@@ -46,6 +46,7 @@  along with GCC; see the file COPYING3.  If not see
 #include "value-prof.h"
 #include "coverage.h"
 #include "params.h"
+#include "l-ipo.h"
 #include "auto-profile.h"

 /* The following routines implements AutoFDO optimization.
@@ -1290,6 +1291,13 @@  auto_profile (void)
   init_node_map ();
   profile_info = autofdo::afdo_profile_info;

+  cgraph_pre_profiling_inlining_done = true;
+  cgraph_process_module_scope_statics ();
+  /* Now perform link to allow cross module inlining.  */
+  cgraph_do_link ();
+  varpool_do_link ();
+  cgraph_unify_type_alias_sets ();
+
   FOR_EACH_FUNCTION (node)
     {
       if (!gimple_has_body_p (node->symbol.decl))
@@ -1301,21 +1309,33 @@  auto_profile (void)

       push_cfun (DECL_STRUCT_FUNCTION (node->symbol.decl));

+      if (L_IPO_COMP_MODE)
+        {
+  basic_block bb;
+  FOR_EACH_BB (bb)
+    {
+      gimple_stmt_iterator gsi;
+      for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
+ {
+  gimple stmt = gsi_stmt (gsi);
+  if (is_gimple_call (stmt))
+    lipo_fixup_cgraph_edge_call_target (stmt);
+ }
+    }
+ }
+
       autofdo::afdo_annotate_cfg ();
       compute_function_frequency ();
       update_ssa (TODO_update_ssa);

+      /* Local pure-const may imply need to fixup the cfg.  */
+      if (execute_fixup_cfg () & TODO_cleanup_cfg)
+ cleanup_tree_cfg ();
+
       current_function_decl = NULL;
       pop_cfun ();
     }

-  cgraph_pre_profiling_inlining_done = true;
-  cgraph_process_module_scope_statics ();
-  /* Now perform link to allow cross module inlining.  */
-  cgraph_do_link ();
-  varpool_do_link ();
-  cgraph_unify_type_alias_sets ();
-
   return TODO_rebuild_cgraph_edges;
 }