diff mbox

Preserve callee cgraph edges when callgraph profiles sections are requested. (issue5101042)

Message ID 20110922230105.AF352B21BB@azwildcat.mtv.corp.google.com
State New
Headers show

Commit Message

Sriraman Tallam Sept. 22, 2011, 11:01 p.m. UTC
This patch preserves cgraph callee edges till pass_final if callgraph
edge profiles sections are requested.  It also renames callgraph edge
profile sections to be .gnu.callgraph instead of .note.callgraph



--
This patch is available for review at http://codereview.appspot.com/5101042

Comments

Sriraman Tallam Sept. 22, 2011, 11:11 p.m. UTC | #1
Forgot to mention the changes:

	* cgraphbuild.c (remove_cgraph_callee_edges): Preserve callee edges if
	callgraph profiles are needed.
	* final.c (rest_of_handle_final): Rename .note.callgraph sections as
	.gnu.callgraph sections.
	* tree-optimize.c (gate_all_optimizations): Set
	cgraph_callee_edges_final_cleanup to true.
	(cgraph_callee_edges_final_cleanup): New global variable.

	* g++.dg/tree-prof/callgraph-profiles.C: New test.

On Thu, Sep 22, 2011 at 4:01 PM, Sriraman Tallam <tmsriram@google.com> wrote:
> This patch preserves cgraph callee edges till pass_final if callgraph
> edge profiles sections are requested.  It also renames callgraph edge
> profile sections to be .gnu.callgraph instead of .note.callgraph
>
>
> Index: cgraphbuild.c
> ===================================================================
> --- cgraphbuild.c       (revision 179098)
> +++ cgraphbuild.c       (working copy)
> @@ -697,10 +697,18 @@ struct gimple_opt_pass pass_rebuild_cgraph_edges =
>  }
>  };
>
> +/* Defined in tree-optimize.c  */
> +extern bool cgraph_callee_edges_final_cleanup;
>
>  static unsigned int
>  remove_cgraph_callee_edges (void)
>  {
> +  /* The -fcallgraph-profiles-sections flag needs the call-graph preserved
> +     till pass_final.  */
> +  if (cgraph_callee_edges_final_cleanup
> +      && flag_callgraph_profiles_sections)
> +      return 0;
> +
>   cgraph_node_remove_callees (cgraph_node (current_function_decl));
>   return 0;
>  }
> Index: final.c
> ===================================================================
> --- final.c     (revision 179098)
> +++ final.c     (working copy)
> @@ -4425,10 +4425,11 @@ rest_of_handle_final (void)
>      profiling information. */
>   if (flag_callgraph_profiles_sections
>       && flag_profile_use
> -      && cgraph_node (current_function_decl) != NULL)
> +      && cgraph_node (current_function_decl) != NULL
> +      && (cgraph_node (current_function_decl))->callees != NULL)
>     {
>       flags = SECTION_DEBUG;
> -      asprintf (&profile_fnname, ".note.callgraph.text.%s", fnname);
> +      asprintf (&profile_fnname, ".gnu.callgraph.text.%s", fnname);
>       switch_to_section (get_section (profile_fnname, flags, NULL));
>       fprintf (asm_out_file, "\t.string \"Function %s\"\n", fnname);
>       dump_cgraph_profiles ();
> Index: testsuite/g++.dg/tree-prof/callgraph-profiles.C
> ===================================================================
> --- testsuite/g++.dg/tree-prof/callgraph-profiles.C     (revision 0)
> +++ testsuite/g++.dg/tree-prof/callgraph-profiles.C     (revision 0)
> @@ -0,0 +1,29 @@
> +/* Verify if call-graph profile sections are created
> +   with -fcallgraph-profiles-sections. */
> +/* { dg-options "-O2 -fcallgraph-profiles-sections -ffunction-sections --save-temps" } */
> +
> +int __attribute__ ((noinline))
> +foo ()
> +{
> +  return 1;
> +}
> +
> +int __attribute__ ((noinline))
> +bar ()
> +{
> +  return 0;
> +}
> +
> +int main ()
> +{
> +  int sum;
> +  for (int i = 0; i< 1000; i++)
> +    {
> +      sum = foo () + bar();
> +    }
> +  return sum * bar ();
> +}
> +
> +/* { dg-final-use { scan-assembler "\.gnu\.callgraph\.text\.main" } } */
> +/* { dg-final-use { scan-assembler "\.string \"1000\"" } } */
> +/* { dg-final-use { cleanup-saved-temps } }  */
> Index: tree-optimize.c
> ===================================================================
> --- tree-optimize.c     (revision 179098)
> +++ tree-optimize.c     (working copy)
> @@ -48,11 +48,17 @@ along with GCC; see the file COPYING3.  If not see
>  #include "plugin.h"
>  #include "regset.h"    /* FIXME: For reg_obstack.  */
>
> +/* Decides if the cgraph callee edges are being cleaned up for the
> +   last time.  */
> +bool cgraph_callee_edges_final_cleanup = false;
> +
>  /* Gate: execute, or not, all of the non-trivial optimizations.  */
>
>  static bool
>  gate_all_optimizations (void)
>  {
> +  /* The cgraph callee edges can be cleaned up for the last time.  */
> +  cgraph_callee_edges_final_cleanup = true;
>   return (optimize >= 1
>          /* Don't bother doing anything if the program has errors.
>             We have to pass down the queue if we already went into SSA */
>
> --
> This patch is available for review at http://codereview.appspot.com/5101042
>
Xinliang David Li Sept. 22, 2011, 11:24 p.m. UTC | #2
ok for google branches.

(Did a little digging -- the remove pass is added because ipa-inline
did not do a good job updating the call graph so there might be some
inconsistency. However the affinity information needs to be computed
after inline transformation, so some more work may be needed in the
future to fix those inconsistencies -- or perhaps rebuild cgraph edges
if profile-cgraph-section option is specified).

David

On Thu, Sep 22, 2011 at 4:01 PM, Sriraman Tallam <tmsriram@google.com> wrote:
> This patch preserves cgraph callee edges till pass_final if callgraph
> edge profiles sections are requested.  It also renames callgraph edge
> profile sections to be .gnu.callgraph instead of .note.callgraph
>
>
> Index: cgraphbuild.c
> ===================================================================
> --- cgraphbuild.c       (revision 179098)
> +++ cgraphbuild.c       (working copy)
> @@ -697,10 +697,18 @@ struct gimple_opt_pass pass_rebuild_cgraph_edges =
>  }
>  };
>
> +/* Defined in tree-optimize.c  */
> +extern bool cgraph_callee_edges_final_cleanup;
>
>  static unsigned int
>  remove_cgraph_callee_edges (void)
>  {
> +  /* The -fcallgraph-profiles-sections flag needs the call-graph preserved
> +     till pass_final.  */
> +  if (cgraph_callee_edges_final_cleanup
> +      && flag_callgraph_profiles_sections)
> +      return 0;
> +
>   cgraph_node_remove_callees (cgraph_node (current_function_decl));
>   return 0;
>  }
> Index: final.c
> ===================================================================
> --- final.c     (revision 179098)
> +++ final.c     (working copy)
> @@ -4425,10 +4425,11 @@ rest_of_handle_final (void)
>      profiling information. */
>   if (flag_callgraph_profiles_sections
>       && flag_profile_use
> -      && cgraph_node (current_function_decl) != NULL)
> +      && cgraph_node (current_function_decl) != NULL
> +      && (cgraph_node (current_function_decl))->callees != NULL)
>     {
>       flags = SECTION_DEBUG;
> -      asprintf (&profile_fnname, ".note.callgraph.text.%s", fnname);
> +      asprintf (&profile_fnname, ".gnu.callgraph.text.%s", fnname);
>       switch_to_section (get_section (profile_fnname, flags, NULL));
>       fprintf (asm_out_file, "\t.string \"Function %s\"\n", fnname);
>       dump_cgraph_profiles ();
> Index: testsuite/g++.dg/tree-prof/callgraph-profiles.C
> ===================================================================
> --- testsuite/g++.dg/tree-prof/callgraph-profiles.C     (revision 0)
> +++ testsuite/g++.dg/tree-prof/callgraph-profiles.C     (revision 0)
> @@ -0,0 +1,29 @@
> +/* Verify if call-graph profile sections are created
> +   with -fcallgraph-profiles-sections. */
> +/* { dg-options "-O2 -fcallgraph-profiles-sections -ffunction-sections --save-temps" } */
> +
> +int __attribute__ ((noinline))
> +foo ()
> +{
> +  return 1;
> +}
> +
> +int __attribute__ ((noinline))
> +bar ()
> +{
> +  return 0;
> +}
> +
> +int main ()
> +{
> +  int sum;
> +  for (int i = 0; i< 1000; i++)
> +    {
> +      sum = foo () + bar();
> +    }
> +  return sum * bar ();
> +}
> +
> +/* { dg-final-use { scan-assembler "\.gnu\.callgraph\.text\.main" } } */
> +/* { dg-final-use { scan-assembler "\.string \"1000\"" } } */
> +/* { dg-final-use { cleanup-saved-temps } }  */
> Index: tree-optimize.c
> ===================================================================
> --- tree-optimize.c     (revision 179098)
> +++ tree-optimize.c     (working copy)
> @@ -48,11 +48,17 @@ along with GCC; see the file COPYING3.  If not see
>  #include "plugin.h"
>  #include "regset.h"    /* FIXME: For reg_obstack.  */
>
> +/* Decides if the cgraph callee edges are being cleaned up for the
> +   last time.  */
> +bool cgraph_callee_edges_final_cleanup = false;
> +
>  /* Gate: execute, or not, all of the non-trivial optimizations.  */
>
>  static bool
>  gate_all_optimizations (void)
>  {
> +  /* The cgraph callee edges can be cleaned up for the last time.  */
> +  cgraph_callee_edges_final_cleanup = true;
>   return (optimize >= 1
>          /* Don't bother doing anything if the program has errors.
>             We have to pass down the queue if we already went into SSA */
>
> --
> This patch is available for review at http://codereview.appspot.com/5101042
>
Martin Jambor Sept. 23, 2011, 2:49 p.m. UTC | #3
Hi,

On Thu, Sep 22, 2011 at 04:24:47PM -0700, Xinliang David Li wrote:
> ok for google branches.
> 
> (Did a little digging -- the remove pass is added because ipa-inline
> did not do a good job updating the call graph so there might be some
> inconsistency. However the affinity information needs to be computed
> after inline transformation, so some more work may be needed in the
> future to fix those inconsistencies -- or perhaps rebuild cgraph edges
> if profile-cgraph-section option is specified).

the main reason why we remove all call graph edges at a few
compilation points is that we do not keep them up to date when
manipulating GIMPLE.  That for example means that when DCE eliminates
a call statement, neither it nor the gimple infrastructure attempts to
locate the associated call graph edge and remove it.  Similarly, when
some form of propagation discovers a target of a call statement, it
updates the statement but that does not bring the associated call
graph edge (if it exists) from the indirect ones to the direct.

Therefore, if call graph edges were kept, they could increasingly get
out of sync with the gimple code which naturally might lead to all
sorts of surprises.  The solution has so far been to re-calculate all
the cgraph edges before they are needed and throw them away before
doing intraprocedural optimization (mainly to avoid confusion).

Inlining and all other IPA passes in fact update call graph edges
meticulously because that is all they have in LTO.  The problem IIRC
was that inlining was also supposed to remove the edges for the
reasons described above but was not doing it consistently.  So it was
moved to a different simple pass.

Removing the edges allowed a quick initial implementation of the call
graph and has certainly been good enough for a long time but recently
people increasingly bump into limitations it poses so we might
consider making them permanent now (or soon).  That is also what you
may want because what you have with the patch is the state of the
edges after inlining (which might be good enough, I admit I don't
know).

HTH,

Martin

> 
> David
> 
> On Thu, Sep 22, 2011 at 4:01 PM, Sriraman Tallam <tmsriram@google.com> wrote:
> > This patch preserves cgraph callee edges till pass_final if callgraph
> > edge profiles sections are requested.  It also renames callgraph edge
> > profile sections to be .gnu.callgraph instead of .note.callgraph
> >
> >
> > Index: cgraphbuild.c
> > ===================================================================
> > --- cgraphbuild.c       (revision 179098)
> > +++ cgraphbuild.c       (working copy)
> > @@ -697,10 +697,18 @@ struct gimple_opt_pass pass_rebuild_cgraph_edges =
> >  }
> >  };
> >
> > +/* Defined in tree-optimize.c  */
> > +extern bool cgraph_callee_edges_final_cleanup;
> >
> >  static unsigned int
> >  remove_cgraph_callee_edges (void)
> >  {
> > +  /* The -fcallgraph-profiles-sections flag needs the call-graph preserved
> > +     till pass_final.  */
> > +  if (cgraph_callee_edges_final_cleanup
> > +      && flag_callgraph_profiles_sections)
> > +      return 0;
> > +
> >   cgraph_node_remove_callees (cgraph_node (current_function_decl));
> >   return 0;
> >  }
> > Index: final.c
> > ===================================================================
> > --- final.c     (revision 179098)
> > +++ final.c     (working copy)
> > @@ -4425,10 +4425,11 @@ rest_of_handle_final (void)
> >      profiling information. */
> >   if (flag_callgraph_profiles_sections
> >       && flag_profile_use
> > -      && cgraph_node (current_function_decl) != NULL)
> > +      && cgraph_node (current_function_decl) != NULL
> > +      && (cgraph_node (current_function_decl))->callees != NULL)
> >     {
> >       flags = SECTION_DEBUG;
> > -      asprintf (&profile_fnname, ".note.callgraph.text.%s", fnname);
> > +      asprintf (&profile_fnname, ".gnu.callgraph.text.%s", fnname);
> >       switch_to_section (get_section (profile_fnname, flags, NULL));
> >       fprintf (asm_out_file, "\t.string \"Function %s\"\n", fnname);
> >       dump_cgraph_profiles ();
> > Index: testsuite/g++.dg/tree-prof/callgraph-profiles.C
> > ===================================================================
> > --- testsuite/g++.dg/tree-prof/callgraph-profiles.C     (revision 0)
> > +++ testsuite/g++.dg/tree-prof/callgraph-profiles.C     (revision 0)
> > @@ -0,0 +1,29 @@
> > +/* Verify if call-graph profile sections are created
> > +   with -fcallgraph-profiles-sections. */
> > +/* { dg-options "-O2 -fcallgraph-profiles-sections -ffunction-sections --save-temps" } */
> > +
> > +int __attribute__ ((noinline))
> > +foo ()
> > +{
> > +  return 1;
> > +}
> > +
> > +int __attribute__ ((noinline))
> > +bar ()
> > +{
> > +  return 0;
> > +}
> > +
> > +int main ()
> > +{
> > +  int sum;
> > +  for (int i = 0; i< 1000; i++)
> > +    {
> > +      sum = foo () + bar();
> > +    }
> > +  return sum * bar ();
> > +}
> > +
> > +/* { dg-final-use { scan-assembler "\.gnu\.callgraph\.text\.main" } } */
> > +/* { dg-final-use { scan-assembler "\.string \"1000\"" } } */
> > +/* { dg-final-use { cleanup-saved-temps } }  */
> > Index: tree-optimize.c
> > ===================================================================
> > --- tree-optimize.c     (revision 179098)
> > +++ tree-optimize.c     (working copy)
> > @@ -48,11 +48,17 @@ along with GCC; see the file COPYING3.  If not see
> >  #include "plugin.h"
> >  #include "regset.h"    /* FIXME: For reg_obstack.  */
> >
> > +/* Decides if the cgraph callee edges are being cleaned up for the
> > +   last time.  */
> > +bool cgraph_callee_edges_final_cleanup = false;
> > +
> >  /* Gate: execute, or not, all of the non-trivial optimizations.  */
> >
> >  static bool
> >  gate_all_optimizations (void)
> >  {
> > +  /* The cgraph callee edges can be cleaned up for the last time.  */
> > +  cgraph_callee_edges_final_cleanup = true;
> >   return (optimize >= 1
> >          /* Don't bother doing anything if the program has errors.
> >             We have to pass down the queue if we already went into SSA */
> >
> > --
> > This patch is available for review at http://codereview.appspot.com/5101042
> >
Xinliang David Li Sept. 23, 2011, 4:12 p.m. UTC | #4
Martin,

thanks for the explanation.

I knew inliner did a good job of maintaining callgraph and profile
information, but did not know about removing dead edges etc.

However the situation you described (unwanted edges, post-ipa DCE
eliminating calls without updating cg etc) won't be a big issue for
Sri's use case as the edges that needed to be written to the note
section are those with non-zero count -- which means they are 'live'
and won't be eliminated.

However, making cg permanent would be even better :) -- there are
definitely other use cases (post-ipa) of it in the future.

thanks,

David


On Fri, Sep 23, 2011 at 7:49 AM, Martin Jambor <mjambor@suse.cz> wrote:
> Hi,
>
> On Thu, Sep 22, 2011 at 04:24:47PM -0700, Xinliang David Li wrote:
>> ok for google branches.
>>
>> (Did a little digging -- the remove pass is added because ipa-inline
>> did not do a good job updating the call graph so there might be some
>> inconsistency. However the affinity information needs to be computed
>> after inline transformation, so some more work may be needed in the
>> future to fix those inconsistencies -- or perhaps rebuild cgraph edges
>> if profile-cgraph-section option is specified).
>
> the main reason why we remove all call graph edges at a few
> compilation points is that we do not keep them up to date when
> manipulating GIMPLE.  That for example means that when DCE eliminates
> a call statement, neither it nor the gimple infrastructure attempts to
> locate the associated call graph edge and remove it.  Similarly, when
> some form of propagation discovers a target of a call statement, it
> updates the statement but that does not bring the associated call
> graph edge (if it exists) from the indirect ones to the direct.
>
> Therefore, if call graph edges were kept, they could increasingly get
> out of sync with the gimple code which naturally might lead to all
> sorts of surprises.  The solution has so far been to re-calculate all
> the cgraph edges before they are needed and throw them away before
> doing intraprocedural optimization (mainly to avoid confusion).
>
> Inlining and all other IPA passes in fact update call graph edges
> meticulously because that is all they have in LTO.  The problem IIRC
> was that inlining was also supposed to remove the edges for the
> reasons described above but was not doing it consistently.  So it was
> moved to a different simple pass.
>
> Removing the edges allowed a quick initial implementation of the call
> graph and has certainly been good enough for a long time but recently
> people increasingly bump into limitations it poses so we might
> consider making them permanent now (or soon).  That is also what you
> may want because what you have with the patch is the state of the
> edges after inlining (which might be good enough, I admit I don't
> know).
>
> HTH,
>
> Martin
>
>>
>> David
>>
>> On Thu, Sep 22, 2011 at 4:01 PM, Sriraman Tallam <tmsriram@google.com> wrote:
>> > This patch preserves cgraph callee edges till pass_final if callgraph
>> > edge profiles sections are requested.  It also renames callgraph edge
>> > profile sections to be .gnu.callgraph instead of .note.callgraph
>> >
>> >
>> > Index: cgraphbuild.c
>> > ===================================================================
>> > --- cgraphbuild.c       (revision 179098)
>> > +++ cgraphbuild.c       (working copy)
>> > @@ -697,10 +697,18 @@ struct gimple_opt_pass pass_rebuild_cgraph_edges =
>> >  }
>> >  };
>> >
>> > +/* Defined in tree-optimize.c  */
>> > +extern bool cgraph_callee_edges_final_cleanup;
>> >
>> >  static unsigned int
>> >  remove_cgraph_callee_edges (void)
>> >  {
>> > +  /* The -fcallgraph-profiles-sections flag needs the call-graph preserved
>> > +     till pass_final.  */
>> > +  if (cgraph_callee_edges_final_cleanup
>> > +      && flag_callgraph_profiles_sections)
>> > +      return 0;
>> > +
>> >   cgraph_node_remove_callees (cgraph_node (current_function_decl));
>> >   return 0;
>> >  }
>> > Index: final.c
>> > ===================================================================
>> > --- final.c     (revision 179098)
>> > +++ final.c     (working copy)
>> > @@ -4425,10 +4425,11 @@ rest_of_handle_final (void)
>> >      profiling information. */
>> >   if (flag_callgraph_profiles_sections
>> >       && flag_profile_use
>> > -      && cgraph_node (current_function_decl) != NULL)
>> > +      && cgraph_node (current_function_decl) != NULL
>> > +      && (cgraph_node (current_function_decl))->callees != NULL)
>> >     {
>> >       flags = SECTION_DEBUG;
>> > -      asprintf (&profile_fnname, ".note.callgraph.text.%s", fnname);
>> > +      asprintf (&profile_fnname, ".gnu.callgraph.text.%s", fnname);
>> >       switch_to_section (get_section (profile_fnname, flags, NULL));
>> >       fprintf (asm_out_file, "\t.string \"Function %s\"\n", fnname);
>> >       dump_cgraph_profiles ();
>> > Index: testsuite/g++.dg/tree-prof/callgraph-profiles.C
>> > ===================================================================
>> > --- testsuite/g++.dg/tree-prof/callgraph-profiles.C     (revision 0)
>> > +++ testsuite/g++.dg/tree-prof/callgraph-profiles.C     (revision 0)
>> > @@ -0,0 +1,29 @@
>> > +/* Verify if call-graph profile sections are created
>> > +   with -fcallgraph-profiles-sections. */
>> > +/* { dg-options "-O2 -fcallgraph-profiles-sections -ffunction-sections --save-temps" } */
>> > +
>> > +int __attribute__ ((noinline))
>> > +foo ()
>> > +{
>> > +  return 1;
>> > +}
>> > +
>> > +int __attribute__ ((noinline))
>> > +bar ()
>> > +{
>> > +  return 0;
>> > +}
>> > +
>> > +int main ()
>> > +{
>> > +  int sum;
>> > +  for (int i = 0; i< 1000; i++)
>> > +    {
>> > +      sum = foo () + bar();
>> > +    }
>> > +  return sum * bar ();
>> > +}
>> > +
>> > +/* { dg-final-use { scan-assembler "\.gnu\.callgraph\.text\.main" } } */
>> > +/* { dg-final-use { scan-assembler "\.string \"1000\"" } } */
>> > +/* { dg-final-use { cleanup-saved-temps } }  */
>> > Index: tree-optimize.c
>> > ===================================================================
>> > --- tree-optimize.c     (revision 179098)
>> > +++ tree-optimize.c     (working copy)
>> > @@ -48,11 +48,17 @@ along with GCC; see the file COPYING3.  If not see
>> >  #include "plugin.h"
>> >  #include "regset.h"    /* FIXME: For reg_obstack.  */
>> >
>> > +/* Decides if the cgraph callee edges are being cleaned up for the
>> > +   last time.  */
>> > +bool cgraph_callee_edges_final_cleanup = false;
>> > +
>> >  /* Gate: execute, or not, all of the non-trivial optimizations.  */
>> >
>> >  static bool
>> >  gate_all_optimizations (void)
>> >  {
>> > +  /* The cgraph callee edges can be cleaned up for the last time.  */
>> > +  cgraph_callee_edges_final_cleanup = true;
>> >   return (optimize >= 1
>> >          /* Don't bother doing anything if the program has errors.
>> >             We have to pass down the queue if we already went into SSA */
>> >
>> > --
>> > This patch is available for review at http://codereview.appspot.com/5101042
>> >
>
Jan Hubicka Sept. 23, 2011, 5:28 p.m. UTC | #5
Hi,

> Martin,
> 
> thanks for the explanation.
> 
> I knew inliner did a good job of maintaining callgraph and profile
> information, but did not know about removing dead edges etc.

This was fixed with introduction of virtual clones, where the edges are still
needed post inlining.  We however do not maintain them longer.
It should not be that hard to do so - we already maintain EH tables on side
of the statements themselves, so one would just hook at the same places as EH
updating is done.
So far it was not neccesary.
> 
> However the situation you described (unwanted edges, post-ipa DCE
> eliminating calls without updating cg etc) won't be a big issue for
> Sri's use case as the edges that needed to be written to the note
> section are those with non-zero count -- which means they are 'live'
> and won't be eliminated.
> 
> However, making cg permanent would be even better :) -- there are
> definitely other use cases (post-ipa) of it in the future.

I would not be opposed to that, it just need some work ;)

Honza
diff mbox

Patch

Index: cgraphbuild.c
===================================================================
--- cgraphbuild.c	(revision 179098)
+++ cgraphbuild.c	(working copy)
@@ -697,10 +697,18 @@  struct gimple_opt_pass pass_rebuild_cgraph_edges =
  }
 };
 
+/* Defined in tree-optimize.c  */
+extern bool cgraph_callee_edges_final_cleanup; 
 
 static unsigned int
 remove_cgraph_callee_edges (void)
 {
+  /* The -fcallgraph-profiles-sections flag needs the call-graph preserved
+     till pass_final.  */
+  if (cgraph_callee_edges_final_cleanup
+      && flag_callgraph_profiles_sections)
+      return 0;
+
   cgraph_node_remove_callees (cgraph_node (current_function_decl));
   return 0;
 }
Index: final.c
===================================================================
--- final.c	(revision 179098)
+++ final.c	(working copy)
@@ -4425,10 +4425,11 @@  rest_of_handle_final (void)
      profiling information. */
   if (flag_callgraph_profiles_sections
       && flag_profile_use
-      && cgraph_node (current_function_decl) != NULL)
+      && cgraph_node (current_function_decl) != NULL
+      && (cgraph_node (current_function_decl))->callees != NULL)
     {
       flags = SECTION_DEBUG;
-      asprintf (&profile_fnname, ".note.callgraph.text.%s", fnname);
+      asprintf (&profile_fnname, ".gnu.callgraph.text.%s", fnname);
       switch_to_section (get_section (profile_fnname, flags, NULL));
       fprintf (asm_out_file, "\t.string \"Function %s\"\n", fnname);
       dump_cgraph_profiles ();
Index: testsuite/g++.dg/tree-prof/callgraph-profiles.C
===================================================================
--- testsuite/g++.dg/tree-prof/callgraph-profiles.C	(revision 0)
+++ testsuite/g++.dg/tree-prof/callgraph-profiles.C	(revision 0)
@@ -0,0 +1,29 @@ 
+/* Verify if call-graph profile sections are created
+   with -fcallgraph-profiles-sections. */
+/* { dg-options "-O2 -fcallgraph-profiles-sections -ffunction-sections --save-temps" } */
+
+int __attribute__ ((noinline))
+foo ()
+{
+  return 1;
+}
+
+int __attribute__ ((noinline))
+bar ()
+{
+  return 0;
+}
+
+int main ()
+{
+  int sum;
+  for (int i = 0; i< 1000; i++)
+    {
+      sum = foo () + bar();
+    }
+  return sum * bar ();
+}
+
+/* { dg-final-use { scan-assembler "\.gnu\.callgraph\.text\.main" } } */
+/* { dg-final-use { scan-assembler "\.string \"1000\"" } } */
+/* { dg-final-use { cleanup-saved-temps } }  */
Index: tree-optimize.c
===================================================================
--- tree-optimize.c	(revision 179098)
+++ tree-optimize.c	(working copy)
@@ -48,11 +48,17 @@  along with GCC; see the file COPYING3.  If not see
 #include "plugin.h"
 #include "regset.h"	/* FIXME: For reg_obstack.  */
 
+/* Decides if the cgraph callee edges are being cleaned up for the
+   last time.  */
+bool cgraph_callee_edges_final_cleanup = false;
+
 /* Gate: execute, or not, all of the non-trivial optimizations.  */
 
 static bool
 gate_all_optimizations (void)
 {
+  /* The cgraph callee edges can be cleaned up for the last time.  */
+  cgraph_callee_edges_final_cleanup = true;
   return (optimize >= 1
 	  /* Don't bother doing anything if the program has errors.
 	     We have to pass down the queue if we already went into SSA */