diff mbox series

[V2] Generalized value pass-through for self-recursive function (ipa/pr93203)

Message ID BYAPR01MB48698368F177337119EF9040F7090@BYAPR01MB4869.prod.exchangelabs.com
State New
Headers show
Series [V2] Generalized value pass-through for self-recursive function (ipa/pr93203) | expand

Commit Message

Feng Xue OS Jan. 25, 2020, 1:50 p.m. UTC
Made some changes.

Feng

Comments

Martin Jambor Feb. 5, 2020, 5:27 p.m. UTC | #1
Hi,

On Sat, Jan 25 2020, Feng Xue OS wrote:
> Made some changes.
>
> Feng
>
> ________________________________________
> From: Feng Xue OS
> Sent: Saturday, January 25, 2020 5:54 PM
> To: mjambor@suse.cz; Jan Hubicka; gcc-patches@gcc.gnu.org
> Subject: [PATCH] Generalized value pass-through for self-recursive function (ipa/pr93203)
>
> Besides simple pass-through (aggregate) jump function, arithmetic (aggregate)
> jump function could also bring same (aggregate) value as parameter passed-in
> for self-feeding recursive call.  For example,
>
>       f1 (int i)    /*  normal jump function */
>          {
>             f1 (i & 1);
>          }
>
> Suppose i is 0, recursive propagation via (i & 1) also gets 0, which
> can be seen as a simple pass-through of i.
>
>       f2 (int *p)  /* aggregate jump function */
>          {
>             int t = *p & 1;
>             f2 (&t);
>          }
> Likewise, if *p is 0, (*p & 1) is also 0, and &t is an aggregate simple
> pass-through of p.
>
> This patch is to support these two kinds of value pass-through.
> Bootstrapped/regtested on x86_64-linux and aarch64-linux.

sorry for the delay in the review.  As far as I am concerned, I am OK
with the patch but please see the few comments below:

> ---
> 2020-01-25  Feng Xue  <fxue@os.amperecomputing.com>
>
>         PR ipa/93203
>         * ipa-cp.c (ipcp_lattice::add_value): Add source with same call
>         edge but different source value.
>         (adjust_callers_for_value_intersection): New function.
>         (gather_edges_for_value): Adjust order of callers to let a
>         non-self-recursive caller be the first element.
>         (self_recursive_pass_through_p): Add a new parameter simple, and
>         check generalized self-recursive pass-through jump function.
>         (self_recursive_agg_pass_through_p): Likewise.
>         (find_more_scalar_values_for_callers_subset): Compute value from
>         pass-through jump function for self-recursive.
>         (intersect_with_plats): Remove code of itersection with unknown
>         place holder value.
>         (intersect_with_agg_replacements): Likewise.
>         (intersect_aggregates_with_edge): Deduce with from pass-through
>         jump function for self-recursive.
>         (decide_whether_version_node): Remove dead callers and adjust
>         order to let a non-self-recursive caller be the first element.
>
> From 74aef0cd2f40ff828a4b2abcbbdbbf4b1aea1fcf Mon Sep 17 00:00:00 2001
> From: Feng Xue <fxue@os.amperecomputing.com>
> Date: Tue, 21 Jan 2020 20:53:38 +0800
> Subject: [PATCH] Generalized value pass-through for self-recusive function
>
> ---
>  gcc/ipa-cp.c                       | 195 ++++++++++++++++++-----------
>  gcc/testsuite/g++.dg/ipa/pr93203.C |  95 ++++++++++++++
>  gcc/testsuite/gcc.dg/ipa/ipcp-1.c  |   2 +-
>  3 files changed, 216 insertions(+), 76 deletions(-)
>  create mode 100644 gcc/testsuite/g++.dg/ipa/pr93203.C
>
> diff --git a/gcc/ipa-cp.c b/gcc/ipa-cp.c
> index 17da1d8e8a7..64d23a34292 100644
> --- a/gcc/ipa-cp.c
> +++ b/gcc/ipa-cp.c

...

> @@ -4817,19 +4867,12 @@ intersect_with_plats (class ipcp_param_lattices *plats,
>  	    break;
>  	  if (aglat->offset - offset == item->offset)
>  	    {
> -	      gcc_checking_assert (item->value);

I've been staring at this for quite a while, trying to figure out how
your patch can put NULL here before I realized it was just a clean-up
:-)  Sending such changes independently or pointing them out in the
email/ChangeLog makes review easier.

>  	      if (aglat->is_single_const ())
>  		{
>  		  tree value = aglat->values->value;
>  
>  		  if (values_equal_for_ipcp_p (item->value, value))
>  		    found = true;
> -		  else if (item->value == error_mark_node)
> -		    {
> -		      /* Replace unknown place holder value with real one.  */
> -		      item->value = value;
> -		      found = true;
> -		    }
>  		}
>  	      break;
>  	    }

...

> @@ -5564,7 +5610,6 @@ decide_whether_version_node (struct cgraph_node *node)
>  	}
>        clone = create_specialized_node (node, known_csts, known_contexts,
>  				       aggvals, callers);
> -      info = IPA_NODE_REF (node);

please either drop this change or change it to:

   gcc_checking_assert (info == IPA_NODE_REF (node));

this line of code was actually necessary when adding nodes possibly
invalidated addresses of all summaries - like fast_function_summary
classes still do.  So if we ever decide to use fast summaries we need a
test to remind us that info address must be obtained again.

Thanks for working on this!

Martin
Feng Xue OS Feb. 10, 2020, 3:28 a.m. UTC | #2
>> -           gcc_checking_assert (item->value);

> I've been staring at this for quite a while, trying to figure out how
> your patch can put NULL here before I realized it was just a clean-up
> :-)  Sending such changes independently or pointing them out in the
> email/ChangeLog makes review easier.

Ok. I'll add some description on this cleanup on ChangeLog.

>> @@ -5564,7 +5610,6 @@ decide_whether_version_node (struct cgraph_node *node)
>>       }
>>        clone = create_specialized_node (node, known_csts, known_contexts,
>>                                      aggvals, callers);
>> -      info = IPA_NODE_REF (node);

> please either drop this change or change it to:
>
>   gcc_checking_assert (info == IPA_NODE_REF (node));

> this line of code was actually necessary when adding nodes possibly
> invalidated addresses of all summaries - like fast_function_summary
> classes still do.  So if we ever decide to use fast summaries we need a
> test to remind us that info address must be obtained again.
Ok. I'm not aware that, will keep the line as original.

Thanks,
Feng
Tamar Christina Feb. 11, 2020, 10:05 a.m. UTC | #3
Hi Feng,

This patch (commit a0f6a8cb414b687f22c9011a894d5e8e398c4be0) is causing ICEs in the GCC and perlbench benchmark in Spec2017.

during IPA pass: cp
lto1: internal compiler error: in find_more_scalar_values_for_callers_subset, at ipa-cp.c:4709
0x1698187 find_more_scalar_values_for_callers_subset
	../.././gcc/ipa-cp.c:4709
0x169f7d3 decide_about_value<tree_node*>
	../.././gcc/ipa-cp.c:5490
0x169fdc3 decide_whether_version_node
	../.././gcc/ipa-cp.c:5537
0x169fdc3 ipcp_decision_stage
	../.././gcc/ipa-cp.c:5718
0x169fdc3 ipcp_driver
	../.././gcc/ipa-cp.c:5901
Please submit a full bug report,

Thanks,
Tamar

> -----Original Message-----
> From: gcc-patches-owner@gcc.gnu.org <gcc-patches-owner@gcc.gnu.org>
> On Behalf Of Feng Xue OS
> Sent: Monday, February 10, 2020 03:29
> To: Martin Jambor <mjambor@suse.cz>; Jan Hubicka <hubicka@ucw.cz>;
> gcc-patches@gcc.gnu.org
> Subject: Re: [PATCH V2] Generalized value pass-through for self-recursive
> function (ipa/pr93203)
> 
> >> -           gcc_checking_assert (item->value);
> 
> > I've been staring at this for quite a while, trying to figure out how
> > your patch can put NULL here before I realized it was just a clean-up
> > :-)  Sending such changes independently or pointing them out in the
> > email/ChangeLog makes review easier.
> 
> Ok. I'll add some description on this cleanup on ChangeLog.
> 
> >> @@ -5564,7 +5610,6 @@ decide_whether_version_node (struct
> cgraph_node *node)
> >>       }
> >>        clone = create_specialized_node (node, known_csts, known_contexts,
> >>                                      aggvals, callers);
> >> -      info = IPA_NODE_REF (node);
> 
> > please either drop this change or change it to:
> >
> >   gcc_checking_assert (info == IPA_NODE_REF (node));
> 
> > this line of code was actually necessary when adding nodes possibly
> > invalidated addresses of all summaries - like fast_function_summary
> > classes still do.  So if we ever decide to use fast summaries we need
> > a test to remind us that info address must be obtained again.
> Ok. I'm not aware that, will keep the line as original.
> 
> Thanks,
> Feng
Feng Xue OS Feb. 11, 2020, 1:10 p.m. UTC | #4
Hi Tarmar,

  Since I could not reproduce this ICE with my local testing on Spec2017, would you share your config, or command line options extracted from compilation of perlbench?

Thanks,
Feng
Tamar Christina Feb. 11, 2020, 2:30 p.m. UTC | #5
Hi Feng,

It looks like the option that causes this to trigger is `--param ipa-cp-eval-threshold=1`

So on AArch64 I get it to trigger with -mcpu=native -Ofast -fomit-frame-pointer -flto --param ipa-cp-eval-threshold=1

Kind Regards,
Tamar

> -----Original Message-----
> From: Feng Xue OS <fxue@os.amperecomputing.com>
> Sent: Tuesday, February 11, 2020 13:11
> To: Tamar Christina <Tamar.Christina@arm.com>; Martin Jambor
> <mjambor@suse.cz>; Jan Hubicka <hubicka@ucw.cz>; gcc-
> patches@gcc.gnu.org
> Cc: nd <nd@arm.com>
> Subject: Re: [PATCH V2] Generalized value pass-through for self-recursive
> function (ipa/pr93203)
> 
> Hi Tarmar,
> 
>   Since I could not reproduce this ICE with my local testing on Spec2017, would
> you share your config, or command line options extracted from compilation
> of perlbench?
> 
> Thanks,
> Feng
> 
> ________________________________________
> From: gcc-patches-owner@gcc.gnu.org <gcc-patches-owner@gcc.gnu.org>
> on behalf of Tamar Christina <Tamar.Christina@arm.com>
> Sent: Tuesday, February 11, 2020 6:05 PM
> To: Feng Xue OS; Martin Jambor; Jan Hubicka; gcc-patches@gcc.gnu.org
> Cc: nd
> Subject: RE: [PATCH V2] Generalized value pass-through for self-recursive
> function (ipa/pr93203)
> 
> Hi Feng,
> 
> This patch (commit a0f6a8cb414b687f22c9011a894d5e8e398c4be0) is causing
> ICEs in the GCC and perlbench benchmark in Spec2017.
> 
> during IPA pass: cp
> lto1: internal compiler error: in find_more_scalar_values_for_callers_subset,
> at ipa-cp.c:4709
> 0x1698187 find_more_scalar_values_for_callers_subset
>         ../.././gcc/ipa-cp.c:4709
> 0x169f7d3 decide_about_value<tree_node*>
>         ../.././gcc/ipa-cp.c:5490
> 0x169fdc3 decide_whether_version_node
>         ../.././gcc/ipa-cp.c:5537
> 0x169fdc3 ipcp_decision_stage
>         ../.././gcc/ipa-cp.c:5718
> 0x169fdc3 ipcp_driver
>         ../.././gcc/ipa-cp.c:5901
> Please submit a full bug report,
> 
> Thanks,
> Tamar
> 
> > -----Original Message-----
> > From: gcc-patches-owner@gcc.gnu.org <gcc-patches-owner@gcc.gnu.org>
> On
> > Behalf Of Feng Xue OS
> > Sent: Monday, February 10, 2020 03:29
> > To: Martin Jambor <mjambor@suse.cz>; Jan Hubicka <hubicka@ucw.cz>;
> > gcc-patches@gcc.gnu.org
> > Subject: Re: [PATCH V2] Generalized value pass-through for
> > self-recursive function (ipa/pr93203)
> >
> > >> -           gcc_checking_assert (item->value);
> >
> > > I've been staring at this for quite a while, trying to figure out
> > > how your patch can put NULL here before I realized it was just a
> > > clean-up
> > > :-)  Sending such changes independently or pointing them out in the
> > > email/ChangeLog makes review easier.
> >
> > Ok. I'll add some description on this cleanup on ChangeLog.
> >
> > >> @@ -5564,7 +5610,6 @@ decide_whether_version_node (struct
> > cgraph_node *node)
> > >>       }
> > >>        clone = create_specialized_node (node, known_csts,
> known_contexts,
> > >>                                      aggvals, callers);
> > >> -      info = IPA_NODE_REF (node);
> >
> > > please either drop this change or change it to:
> > >
> > >   gcc_checking_assert (info == IPA_NODE_REF (node));
> >
> > > this line of code was actually necessary when adding nodes possibly
> > > invalidated addresses of all summaries - like fast_function_summary
> > > classes still do.  So if we ever decide to use fast summaries we
> > > need a test to remind us that info address must be obtained again.
> > Ok. I'm not aware that, will keep the line as original.
> >
> > Thanks,
> > Feng
Feng Xue OS Feb. 13, 2020, 5:39 a.m. UTC | #6
I've submitted a bug tracker, https://gcc.gnu.org/bugzilla/show_bug.cgi?id=93707.

The root cause is that for a self-recursive function, a for-all-contexts clone could generate
an edge whose callee is not the function. Therefore, to check whether an edge stands for a
recursive call during cloning, we should not use ordinary way of comparing caller and callee
of the edge.

Bootstrapped/regtested on x86_64-linux and aarch64-linux, and also tested Spec 2017 with
option "--param ipa-cp-eval-threshold=1". 

Feng
---
2020-02-13  Feng Xue  <fxue@os.amperecomputing.com>

        PR ipa/93707
        * ipa-cp.c (self_recursive_pass_through_p): Add a new parameter "node",
        and check recursiveness by comparing "node" and cs->caller.
        (self_recursive_agg_pass_through_p): Likewise.
        (find_more_scalar_values_for_callers_subset): Add "node" argument to
        self_recursive_pass_through_p call.
        (intersect_aggregates_with_edge): Add a new parameter "node", and add
        "node" argument to self_cursive_agg_pass_through_p call.
        (find_aggregate_values_for_callers_subset): Add "node" argument to
        self_recursive_pass_through_p and intersect_aggregates_with_edge calls.
        (cgraph_edge_brings_all_agg_vals_for_node): Add "node" argument to
        intersect_aggregates_with_edge call.

> ________________________________________
> From: gcc-patches-owner@gcc.gnu.org <gcc-patches-owner@gcc.gnu.org>
> on behalf of Tamar Christina <Tamar.Christina@arm.com>
> Sent: Tuesday, February 11, 2020 6:05 PM
> To: Feng Xue OS; Martin Jambor; Jan Hubicka; gcc-patches@gcc.gnu.org
> Cc: nd
> Subject: RE: [PATCH V2] Generalized value pass-through for self-recursive
> function (ipa/pr93203)
>
> Hi Feng,
>
> This patch (commit a0f6a8cb414b687f22c9011a894d5e8e398c4be0) is causing
> ICEs in the GCC and perlbench benchmark in Spec2017.
>
> during IPA pass: cp
> lto1: internal compiler error: in find_more_scalar_values_for_callers_subset,
> at ipa-cp.c:4709
> 0x1698187 find_more_scalar_values_for_callers_subset
>         ../.././gcc/ipa-cp.c:4709
> 0x169f7d3 decide_about_value<tree_node*>
>         ../.././gcc/ipa-cp.c:5490
> 0x169fdc3 decide_whether_version_node
>         ../.././gcc/ipa-cp.c:5537
> 0x169fdc3 ipcp_decision_stage
>         ../.././gcc/ipa-cp.c:5718
> 0x169fdc3 ipcp_driver
>         ../.././gcc/ipa-cp.c:5901
> Please submit a full bug report,
>
> Thanks,
> Tamar
>
Tamar Christina Feb. 17, 2020, 8:44 a.m. UTC | #7
Hi Feng,

Thanks! The patch seems to work.

Hopefully it gets reviewed soon so we can fix the two benchmarks 😊

Thanks,
Tamar

> -----Original Message-----
> From: Feng Xue OS <fxue@os.amperecomputing.com>
> Sent: Thursday, February 13, 2020 05:40
> To: Tamar Christina <Tamar.Christina@arm.com>; Martin Jambor
> <mjambor@suse.cz>; Jan Hubicka <hubicka@ucw.cz>; gcc-
> patches@gcc.gnu.org
> Cc: nd <nd@arm.com>
> Subject: [PATCH] Fix bug in recursiveness check for function to be cloned
> (ipa/pr93707)
> 
> I've submitted a bug tracker,
> https://gcc.gnu.org/bugzilla/show_bug.cgi?id=93707.
> 
> The root cause is that for a self-recursive function, a for-all-contexts clone
> could generate an edge whose callee is not the function. Therefore, to check
> whether an edge stands for a recursive call during cloning, we should not use
> ordinary way of comparing caller and callee of the edge.
> 
> Bootstrapped/regtested on x86_64-linux and aarch64-linux, and also tested
> Spec 2017 with option "--param ipa-cp-eval-threshold=1".
> 
> Feng
> ---
> 2020-02-13  Feng Xue  <fxue@os.amperecomputing.com>
> 
>         PR ipa/93707
>         * ipa-cp.c (self_recursive_pass_through_p): Add a new parameter
> "node",
>         and check recursiveness by comparing "node" and cs->caller.
>         (self_recursive_agg_pass_through_p): Likewise.
>         (find_more_scalar_values_for_callers_subset): Add "node" argument to
>         self_recursive_pass_through_p call.
>         (intersect_aggregates_with_edge): Add a new parameter "node", and
> add
>         "node" argument to self_cursive_agg_pass_through_p call.
>         (find_aggregate_values_for_callers_subset): Add "node" argument to
>         self_recursive_pass_through_p and intersect_aggregates_with_edge
> calls.
>         (cgraph_edge_brings_all_agg_vals_for_node): Add "node" argument to
>         intersect_aggregates_with_edge call.
> 
> > ________________________________________
> > From: gcc-patches-owner@gcc.gnu.org <gcc-patches-owner@gcc.gnu.org>
> on
> > behalf of Tamar Christina <Tamar.Christina@arm.com>
> > Sent: Tuesday, February 11, 2020 6:05 PM
> > To: Feng Xue OS; Martin Jambor; Jan Hubicka; gcc-patches@gcc.gnu.org
> > Cc: nd
> > Subject: RE: [PATCH V2] Generalized value pass-through for
> > self-recursive function (ipa/pr93203)
> >
> > Hi Feng,
> >
> > This patch (commit a0f6a8cb414b687f22c9011a894d5e8e398c4be0) is
> > causing ICEs in the GCC and perlbench benchmark in Spec2017.
> >
> > during IPA pass: cp
> > lto1: internal compiler error: in
> > find_more_scalar_values_for_callers_subset,
> > at ipa-cp.c:4709
> > 0x1698187 find_more_scalar_values_for_callers_subset
> >         ../.././gcc/ipa-cp.c:4709
> > 0x169f7d3 decide_about_value<tree_node*>
> >         ../.././gcc/ipa-cp.c:5490
> > 0x169fdc3 decide_whether_version_node
> >         ../.././gcc/ipa-cp.c:5537
> > 0x169fdc3 ipcp_decision_stage
> >         ../.././gcc/ipa-cp.c:5718
> > 0x169fdc3 ipcp_driver
> >         ../.././gcc/ipa-cp.c:5901
> > Please submit a full bug report,
> >
> > Thanks,
> > Tamar
> >
Feng Xue OS Feb. 18, 2020, 3:16 p.m. UTC | #8
Thanks,

Feng
Martin Jambor Feb. 19, 2020, 4:28 p.m. UTC | #9
Hi,

On Thu, Feb 13 2020, Feng Xue OS wrote:
> I've submitted a bug tracker, https://gcc.gnu.org/bugzilla/show_bug.cgi?id=93707.
>
> The root cause is that for a self-recursive function, a for-all-contexts clone could generate
> an edge whose callee is not the function. Therefore, to check whether an edge stands for a
> recursive call during cloning, we should not use ordinary way of comparing caller and callee
> of the edge.
>
> Bootstrapped/regtested on x86_64-linux and aarch64-linux, and also tested Spec 2017 with
> option "--param ipa-cp-eval-threshold=1". 
>
> Feng
> ---
> 2020-02-13  Feng Xue  <fxue@os.amperecomputing.com>
>
>         PR ipa/93707
>         * ipa-cp.c (self_recursive_pass_through_p): Add a new parameter "node",
>         and check recursiveness by comparing "node" and cs->caller.
>         (self_recursive_agg_pass_through_p): Likewise.
>         (find_more_scalar_values_for_callers_subset): Add "node" argument to
>         self_recursive_pass_through_p call.
>         (intersect_aggregates_with_edge): Add a new parameter "node", and add
>         "node" argument to self_cursive_agg_pass_through_p call.
>         (find_aggregate_values_for_callers_subset): Add "node" argument to
>         self_recursive_pass_through_p and intersect_aggregates_with_edge calls.
>         (cgraph_edge_brings_all_agg_vals_for_node): Add "node" argument to
>         intersect_aggregates_with_edge call.

first, thanks for a very nice testcase and for working on this.

However, I believe that his approach mostly papers over a bug that
happens earlier, specifically that cgraph_edge_brings_value_p returned
true for the self-recursive edge from all-context clone to itself, even
though when evaluating the second argument.  We assume that all context
clones get at least as many constants as any other potential clone, but
that does not work for self-recursive edges with pass-through parameters
that that just pass along the received constant.

That is how we got a wrong edge in the vector of edges bringing the
constant and it is the primary reason why self_recursive_pass_through_p
and its users did not work correctly.

I would therefore like to fix the problem with the following patch.  It
has passed bootstrap and testing on x86_64-linux, LTO bootstrap is
underway.

Honza, is it OK for trunk?  Tamar, can you please double check it fixes
your problem with perlbench?

Thanks,

Martin


ipa-cp: Avoid wrongly gathering self-recursive edges  (PR 93707)

2020-02-18  Martin Jambor  <mjambor@suse.cz>
	    Feng Xue  <fxue@os.amperecomputing.com>

	PR ipa/93707
	* ipa-cp.c (same_node_or_its_all_contexts_clone_p): Replaced with
	new function calls_same_node_or_its_all_contexts_clone_p.
	(cgraph_edge_brings_value_p): Use it.  Fix comment.
	(cgraph_edge_brings_value_p): Likewise.

	testsuite/
	* gcc.dg/ipa/pr93707.c: New test.
---
 gcc/ChangeLog                      |  9 +++++++++
 gcc/ipa-cp.c                       | 29 ++++++++++++++---------------
 gcc/testsuite/ChangeLog            |  6 ++++++
 gcc/testsuite/gcc.dg/ipa/pr93707.c | 29 +++++++++++++++++++++++++++++
 4 files changed, 58 insertions(+), 15 deletions(-)
 create mode 100644 gcc/testsuite/gcc.dg/ipa/pr93707.c

diff --git a/gcc/ipa-cp.c b/gcc/ipa-cp.c
index 4f5b72e6994..4609375bf8d 100644
--- a/gcc/ipa-cp.c
+++ b/gcc/ipa-cp.c
@@ -4033,15 +4033,23 @@ edge_clone_summary_t::duplicate (cgraph_edge *src_edge, cgraph_edge *dst_edge,
   src_data->next_clone = dst_edge;
 }
 
-/* Return true is NODE is DEST or its clone for all contexts.  */
+/* Return true is CS calls DEST or its clone for all contexts, except for
+   self-recursive nodes in which it has to be DEST itself.  */
 
 static bool
-same_node_or_its_all_contexts_clone_p (cgraph_node *node, cgraph_node *dest)
+calls_same_node_or_its_all_contexts_clone_p (cgraph_edge *cs, cgraph_node *dest)
 {
-  if (node == dest)
+  enum availability availability;
+  cgraph_node *callee = cs->callee->function_symbol (&availability);
+
+  if (availability <= AVAIL_INTERPOSABLE)
+    return false;
+  if (callee == dest)
     return true;
+  if (cs->caller == callee)
+    return false;
 
-  class ipa_node_params *info = IPA_NODE_REF (node);
+  class ipa_node_params *info = IPA_NODE_REF (callee);
   return info->is_all_contexts_clone && info->ipcp_orig_node == dest;
 }
 
@@ -4053,11 +4061,8 @@ cgraph_edge_brings_value_p (cgraph_edge *cs, ipcp_value_source<tree> *src,
 			    cgraph_node *dest, ipcp_value<tree> *dest_val)
 {
   class ipa_node_params *caller_info = IPA_NODE_REF (cs->caller);
-  enum availability availability;
-  cgraph_node *real_dest = cs->callee->function_symbol (&availability);
 
-  if (availability <= AVAIL_INTERPOSABLE
-      || !same_node_or_its_all_contexts_clone_p (real_dest, dest)
+  if (!calls_same_node_or_its_all_contexts_clone_p (cs, dest)
       || caller_info->node_dead)
     return false;
 
@@ -4076,9 +4081,6 @@ cgraph_edge_brings_value_p (cgraph_edge *cs, ipcp_value_source<tree> *src,
     }
   else
     {
-      /* At the moment we do not propagate over arithmetic jump functions in
-	 SCCs, so it is safe to detect self-feeding recursive calls in this
-	 way.  */
       if (src->val == dest_val)
 	return true;
 
@@ -4113,11 +4115,8 @@ cgraph_edge_brings_value_p (cgraph_edge *cs,
 			    ipcp_value<ipa_polymorphic_call_context> *)
 {
   class ipa_node_params *caller_info = IPA_NODE_REF (cs->caller);
-  enum availability avail;
-  cgraph_node *real_dest = cs->callee->function_symbol (&avail);
 
-  if (avail <= AVAIL_INTERPOSABLE
-      || !same_node_or_its_all_contexts_clone_p (real_dest, dest)
+  if (!calls_same_node_or_its_all_contexts_clone_p (cs, dest)
       || caller_info->node_dead)
     return false;
   if (!src->val)
diff --git a/gcc/testsuite/gcc.dg/ipa/pr93707.c b/gcc/testsuite/gcc.dg/ipa/pr93707.c
new file mode 100644
index 00000000000..e200a3a432b
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/ipa/pr93707.c
@@ -0,0 +1,29 @@
+/* { dg-do compile } */
+/* { dg-options "-O3 --param ipa-cp-eval-threshold=1" } */
+
+int foo();
+int data[100];
+
+__attribute__((noinline)) static int recur_fn (int i, int j, int depth)
+{
+   if (depth > 10)
+     return 1;
+
+   data[i + j]++;
+
+   if (depth & 3)
+     recur_fn (i, 1, depth + 1);
+   else
+     recur_fn (i, j & 1, depth + 1);
+
+   foo();
+
+   return i + j;
+}
+
+int caller (int v, int depth)
+{
+  recur_fn (1, v, depth);
+
+  return 0;
+}
Feng Xue OS Feb. 20, 2020, 3:36 a.m. UTC | #10
This is a simpel and nice fix, but could suppress some CP opportunities for
self-recursive call.  Using the test case as example, the first should be a
for-all-context clone, and the call "recur_fn (i, 1, depth + 1)" is replaced with
a newly created recursive node. Thus, in the next round of CP iteration, the
way to do CP for the 2nd arugment "1" is blocked, because its coming edge
can not pass check by cgraph_edge_brings_value_p().

+__attribute__((noinline)) static int recur_fn (int i, int j, int depth)
+{
+   if (depth > 10)
+     return 1;
+
+   data[i + j]++;
+
+   if (depth & 3)
+     recur_fn (i, 1, depth + 1);
+   else
+     recur_fn (i, j & 1, depth + 1);
+
+   foo();
+
+   return i + j;
+}
+
+int caller (int v, int depth)
+{
+  recur_fn (1, v, depth);
+
+  return 0;
+}


>However, I believe that his approach mostly papers over a bug that
>happens earlier, specifically that cgraph_edge_brings_value_p returned
>true for the self-recursive edge from all-context clone to itself, even
>though when evaluating the second argument.  We assume that all context
>clones get at least as many constants as any other potential clone, but
>that does not work for self-recursive edges with pass-through parameters
>that that just pass along the received constant.

The following check on value in cgraph_edge_brings_value_p could ensure
whether the value can arrive the dest node or not. If the value is a constant
without source, as above example "1", this is allowed. Otherwise, code snippet
enclosed by "if (caller_info->ipcp_orig_node)" could capture for-all-context clone.

Thanks,
Feng
Tamar Christina Feb. 20, 2020, 12:57 p.m. UTC | #11
Hi Martin,

> Honza, is it OK for trunk?  Tamar, can you please double check it fixes
> your problem with perlbench?
>

Thanks for working on this! Yes it does seem to fix the regression as well.

Cheers,
Tamar

> Thanks,
>
> Martin
>
>
> ipa-cp: Avoid wrongly gathering self-recursive edges  (PR 93707)
>
> 2020-02-18  Martin Jambor  <mjambor@suse.cz>
>            Feng Xue  <fxue@os.amperecomputing.com>
>
>        PR ipa/93707
>        * ipa-cp.c (same_node_or_its_all_contexts_clone_p): Replaced with
>        new function calls_same_node_or_its_all_contexts_clone_p.
>        (cgraph_edge_brings_value_p): Use it.  Fix comment.
>        (cgraph_edge_brings_value_p): Likewise.
>
>        testsuite/
>        * gcc.dg/ipa/pr93707.c: New test.
> ---
>  gcc/ChangeLog                      |  9 +++++++++
>  gcc/ipa-cp.c                       | 29 ++++++++++++++---------------
>  gcc/testsuite/ChangeLog            |  6 ++++++
>  gcc/testsuite/gcc.dg/ipa/pr93707.c | 29
> +++++++++++++++++++++++++++++
>  4 files changed, 58 insertions(+), 15 deletions(-)
>  create mode 100644 gcc/testsuite/gcc.dg/ipa/pr93707.c
>
> diff --git a/gcc/ipa-cp.c b/gcc/ipa-cp.c
> index 4f5b72e6994..4609375bf8d 100644
> --- a/gcc/ipa-cp.c
> +++ b/gcc/ipa-cp.c
> @@ -4033,15 +4033,23 @@ edge_clone_summary_t::duplicate
> (cgraph_edge *src_edge, cgraph_edge *dst_edge,
>    src_data->next_clone = dst_edge;
>  }
>
> -/* Return true is NODE is DEST or its clone for all contexts.  */
> +/* Return true is CS calls DEST or its clone for all contexts, except for
> +   self-recursive nodes in which it has to be DEST itself.  */
>
>  static bool
> -same_node_or_its_all_contexts_clone_p (cgraph_node *node,
> cgraph_node *dest)
> +calls_same_node_or_its_all_contexts_clone_p (cgraph_edge *cs,
> cgraph_node *dest)
>  {
> -  if (node == dest)
> +  enum availability availability;
> +  cgraph_node *callee = cs->callee->function_symbol (&availability);
> +
> +  if (availability <= AVAIL_INTERPOSABLE)
> +    return false;
> +  if (callee == dest)
>      return true;
> +  if (cs->caller == callee)
> +    return false;
>
> -  class ipa_node_params *info = IPA_NODE_REF (node);
> +  class ipa_node_params *info = IPA_NODE_REF (callee);
>    return info->is_all_contexts_clone && info->ipcp_orig_node == dest;
>  }
>
> @@ -4053,11 +4061,8 @@ cgraph_edge_brings_value_p (cgraph_edge *cs,
> ipcp_value_source<tree> *src,
>                            cgraph_node *dest, ipcp_value<tree> *dest_val)
>  {
>    class ipa_node_params *caller_info = IPA_NODE_REF (cs->caller);
> -  enum availability availability;
> -  cgraph_node *real_dest = cs->callee->function_symbol (&availability);
>
> -  if (availability <= AVAIL_INTERPOSABLE
> -      || !same_node_or_its_all_contexts_clone_p (real_dest, dest)
> +  if (!calls_same_node_or_its_all_contexts_clone_p (cs, dest)
>        || caller_info->node_dead)
>      return false;
>
> @@ -4076,9 +4081,6 @@ cgraph_edge_brings_value_p (cgraph_edge *cs,
> ipcp_value_source<tree> *src,
>      }
>    else
>      {
> -      /* At the moment we do not propagate over arithmetic jump functions in
> -      SCCs, so it is safe to detect self-feeding recursive calls in this
> -      way.  */
>        if (src->val == dest_val)
>        return true;
>
> @@ -4113,11 +4115,8 @@ cgraph_edge_brings_value_p (cgraph_edge *cs,
>                            ipcp_value<ipa_polymorphic_call_context> *)
>  {
>    class ipa_node_params *caller_info = IPA_NODE_REF (cs->caller);
> -  enum availability avail;
> -  cgraph_node *real_dest = cs->callee->function_symbol (&avail);
>
> -  if (avail <= AVAIL_INTERPOSABLE
> -      || !same_node_or_its_all_contexts_clone_p (real_dest, dest)
> +  if (!calls_same_node_or_its_all_contexts_clone_p (cs, dest)
>        || caller_info->node_dead)
>      return false;
>    if (!src->val)
> diff --git a/gcc/testsuite/gcc.dg/ipa/pr93707.c
> b/gcc/testsuite/gcc.dg/ipa/pr93707.c
> new file mode 100644
> index 00000000000..e200a3a432b
> --- /dev/null
> +++ b/gcc/testsuite/gcc.dg/ipa/pr93707.c
> @@ -0,0 +1,29 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O3 --param ipa-cp-eval-threshold=1" } */
> +
> +int foo();
> +int data[100];
> +
> +__attribute__((noinline)) static int recur_fn (int i, int j, int depth)
> +{
> +   if (depth > 10)
> +     return 1;
> +
> +   data[i + j]++;
> +
> +   if (depth & 3)
> +     recur_fn (i, 1, depth + 1);
> +   else
> +     recur_fn (i, j & 1, depth + 1);
> +
> +   foo();
> +
> +   return i + j;
> +}
> +
> +int caller (int v, int depth)
> +{
> +  recur_fn (1, v, depth);
> +
> +  return 0;
> +}
> --
> 2.25.0
Martin Jambor Feb. 21, 2020, 6:15 p.m. UTC | #12
Hi,

On Thu, Feb 20 2020, Feng Xue OS wrote:
> This is a simpel and nice fix, but could suppress some CP opportunities for
> self-recursive call.  Using the test case as example, the first should be a
> for-all-context clone, and the call "recur_fn (i, 1, depth + 1)" is replaced with
> a newly created recursive node. Thus, in the next round of CP iteration, the
> way to do CP for the 2nd arugment "1" is blocked, because its coming edge
> can not pass check by cgraph_edge_brings_value_p().
>
> +__attribute__((noinline)) static int recur_fn (int i, int j, int depth)
> +{
> +   if (depth > 10)
> +     return 1;
> +
> +   data[i + j]++;
> +
> +   if (depth & 3)
> +     recur_fn (i, 1, depth + 1);
> +   else
> +     recur_fn (i, j & 1, depth + 1);
> +
> +   foo();
> +
> +   return i + j;
> +}
> +
> +int caller (int v, int depth)
> +{
> +  recur_fn (1, v, depth);
> +
> +  return 0;
> +}
>
>
>>However, I believe that his approach mostly papers over a bug that
>>happens earlier, specifically that cgraph_edge_brings_value_p returned
>>true for the self-recursive edge from all-context clone to itself, even
>>though when evaluating the second argument.  We assume that all context
>>clones get at least as many constants as any other potential clone, but
>>that does not work for self-recursive edges with pass-through parameters
>>that that just pass along the received constant.
>
> The following check on value in cgraph_edge_brings_value_p could ensure
> whether the value can arrive the dest node or not. If the value is a constant
> without source, as above example "1", this is allowed. Otherwise, code snippet
> enclosed by "if (caller_info->ipcp_orig_node)" could capture for-all-context clone.

there has not been any "following check" in your email but I believe I
understand what you mean, and I added such check to my patch so that the
edge carrying the non-pass through jump function was accepted by the
cgraph_edge_brings_value_p predicate.

However, that lead to the same assert in
find_more_scalar_values_for_callers_subset because on that edge it tried
to compute the depth + 1 value before it had any value to calculate it
from.

So after staring at the problem for another while I realized that the
users self_recursive_pass_through_p and
self_recursive_agg_pass_through_p would be OK if it returned false for
self-recursive calls from/to a node which is already a clone - clones
have their known constant values set at the point of their creation -
and that doing so avoids this problem.  So that is what the patch below
does.  I have still kept the cgraph_edge_brings_value_p hunks too, so
that edges are collected reliably.

Bootstrapped and tested on an x86_64-linux, LTO bootstrap underway.

What do you think?

Martin


2020-02-21  Martin Jambor  <mjambor@suse.cz>
	    Feng Xue  <fxue@os.amperecomputing.com>

	PR ipa/93707
	* ipa-cp.c (same_node_or_its_all_contexts_clone_p): Replaced with
	new function calls_same_node_or_its_all_contexts_clone_p.
	(cgraph_edge_brings_value_p): Use it.
	(cgraph_edge_brings_value_p): Likewise.
	(self_recursive_pass_through_p): Return false if caller is a clone.
	(self_recursive_agg_pass_through_p): Likewise.

	testsuite/
	* gcc.dg/ipa/pr93707.c: New test.
---
 gcc/ChangeLog                      | 11 +++++++++
 gcc/ipa-cp.c                       | 38 +++++++++++++++++-------------
 gcc/testsuite/ChangeLog            |  6 +++++
 gcc/testsuite/gcc.dg/ipa/pr93707.c | 31 ++++++++++++++++++++++++
 4 files changed, 69 insertions(+), 17 deletions(-)
 create mode 100644 gcc/testsuite/gcc.dg/ipa/pr93707.c

diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index 7c481407de9..a965cae4f07 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,3 +1,14 @@
+2020-02-21  Martin Jambor  <mjambor@suse.cz>
+	    Feng Xue  <fxue@os.amperecomputing.com>
+
+	PR ipa/93707
+	* ipa-cp.c (same_node_or_its_all_contexts_clone_p): Replaced with
+	new function calls_same_node_or_its_all_contexts_clone_p.
+	(cgraph_edge_brings_value_p): Use it.
+	(cgraph_edge_brings_value_p): Likewise.
+	(self_recursive_pass_through_p): Return false if caller is a clone.
+	(self_recursive_agg_pass_through_p): Likewise.
+
 2020-02-17  Richard Biener  <rguenther@suse.de>
 
 	PR c/86134
diff --git a/gcc/ipa-cp.c b/gcc/ipa-cp.c
index 4f5b72e6994..aa228df1204 100644
--- a/gcc/ipa-cp.c
+++ b/gcc/ipa-cp.c
@@ -4033,15 +4033,24 @@ edge_clone_summary_t::duplicate (cgraph_edge *src_edge, cgraph_edge *dst_edge,
   src_data->next_clone = dst_edge;
 }
 
-/* Return true is NODE is DEST or its clone for all contexts.  */
+/* Return true is CS calls DEST or its clone for all contexts, except for
+   self-recursive nodes in which it has to be DEST itself.  */
 
 static bool
-same_node_or_its_all_contexts_clone_p (cgraph_node *node, cgraph_node *dest)
+calls_same_node_or_its_all_contexts_clone_p (cgraph_edge *cs, cgraph_node *dest,
+					     bool allow_recursion_to_clone)
 {
-  if (node == dest)
+  enum availability availability;
+  cgraph_node *callee = cs->callee->function_symbol (&availability);
+
+  if (availability <= AVAIL_INTERPOSABLE)
+    return false;
+  if (callee == dest)
     return true;
+  if (!allow_recursion_to_clone && cs->caller == callee)
+    return false;
 
-  class ipa_node_params *info = IPA_NODE_REF (node);
+  class ipa_node_params *info = IPA_NODE_REF (callee);
   return info->is_all_contexts_clone && info->ipcp_orig_node == dest;
 }
 
@@ -4053,11 +4062,8 @@ cgraph_edge_brings_value_p (cgraph_edge *cs, ipcp_value_source<tree> *src,
 			    cgraph_node *dest, ipcp_value<tree> *dest_val)
 {
   class ipa_node_params *caller_info = IPA_NODE_REF (cs->caller);
-  enum availability availability;
-  cgraph_node *real_dest = cs->callee->function_symbol (&availability);
 
-  if (availability <= AVAIL_INTERPOSABLE
-      || !same_node_or_its_all_contexts_clone_p (real_dest, dest)
+  if (!calls_same_node_or_its_all_contexts_clone_p (cs, dest, !src->val)
       || caller_info->node_dead)
     return false;
 
@@ -4076,9 +4082,6 @@ cgraph_edge_brings_value_p (cgraph_edge *cs, ipcp_value_source<tree> *src,
     }
   else
     {
-      /* At the moment we do not propagate over arithmetic jump functions in
-	 SCCs, so it is safe to detect self-feeding recursive calls in this
-	 way.  */
       if (src->val == dest_val)
 	return true;
 
@@ -4113,11 +4116,8 @@ cgraph_edge_brings_value_p (cgraph_edge *cs,
 			    ipcp_value<ipa_polymorphic_call_context> *)
 {
   class ipa_node_params *caller_info = IPA_NODE_REF (cs->caller);
-  enum availability avail;
-  cgraph_node *real_dest = cs->callee->function_symbol (&avail);
 
-  if (avail <= AVAIL_INTERPOSABLE
-      || !same_node_or_its_all_contexts_clone_p (real_dest, dest)
+  if (!calls_same_node_or_its_all_contexts_clone_p (cs, dest, true)
       || caller_info->node_dead)
     return false;
   if (!src->val)
@@ -4630,7 +4630,9 @@ self_recursive_pass_through_p (cgraph_edge *cs, ipa_jump_func *jfunc, int i,
       && availability > AVAIL_INTERPOSABLE
       && jfunc->type == IPA_JF_PASS_THROUGH
       && (!simple || ipa_get_jf_pass_through_operation (jfunc) == NOP_EXPR)
-      && ipa_get_jf_pass_through_formal_id (jfunc) == i)
+      && ipa_get_jf_pass_through_formal_id (jfunc) == i
+      && IPA_NODE_REF (cs->caller)
+      && !IPA_NODE_REF (cs->caller)->ipcp_orig_node)
     return true;
   return false;
 }
@@ -4651,7 +4653,9 @@ self_recursive_agg_pass_through_p (cgraph_edge *cs, ipa_agg_jf_item *jfunc,
       && jfunc->offset == jfunc->value.load_agg.offset
       && (!simple || jfunc->value.pass_through.operation == NOP_EXPR)
       && jfunc->value.pass_through.formal_id == i
-      && useless_type_conversion_p (jfunc->value.load_agg.type, jfunc->type))
+      && useless_type_conversion_p (jfunc->value.load_agg.type, jfunc->type)
+      && IPA_NODE_REF (cs->caller)
+      && !IPA_NODE_REF (cs->caller)->ipcp_orig_node)
     return true;
   return false;
 }
diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog
index b326529ac75..e8c9f197e42 100644
--- a/gcc/testsuite/ChangeLog
+++ b/gcc/testsuite/ChangeLog
@@ -1,3 +1,9 @@
+2020-02-18  Martin Jambor  <mjambor@suse.cz>
+	    Feng Xue  <fxue@os.amperecomputing.com>
+
+	PR ipa/93707
+	* gcc.dg/ipa/pr93707.c: New test.
+
 2020-02-17  Richard Biener  <rguenther@suse.de>
 
 	PR c/86134
diff --git a/gcc/testsuite/gcc.dg/ipa/pr93707.c b/gcc/testsuite/gcc.dg/ipa/pr93707.c
new file mode 100644
index 00000000000..685fae45020
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/ipa/pr93707.c
@@ -0,0 +1,31 @@
+/* { dg-do compile } */
+/* { dg-options "-O3 --param ipa-cp-eval-threshold=1 -fdump-ipa-cp" } */
+
+int foo();
+int data[100];
+
+__attribute__((noinline)) static int recur_fn (int i, int j, int depth)
+{
+   if (depth > 10)
+     return 1;
+
+   data[i + j]++;
+
+   if (depth & 3)
+     recur_fn (i, 1, depth + 1);
+   else
+     recur_fn (i, j & 1, depth + 1);
+
+   foo();
+
+   return i + j;
+}
+
+int caller (int v, int depth)
+{
+  recur_fn (1, v, depth);
+
+  return 0;
+}
+
+/* { dg-final { scan-ipa-dump-times "Clone of recur_fn/" 2 "cp" } } */
Feng Xue OS Feb. 22, 2020, 3:32 a.m. UTC | #13
It is a good solution.

Thanks,
Feng
Martin Jambor Feb. 24, 2020, 3:41 p.m. UTC | #14
Hi,

On Sat, Feb 22 2020, Feng Xue OS wrote:
> It is a good solution.
>

OK, so I'd like to go with my patch as it hopefully keeps some of the
predicates a tiny bit simpler.  I have re-based the patch on today's
trunk and amended function comments as necessary (which I forgot last
week).  The result is below, it has passed bootstrap and testing and
LTO+PGO bootstrap on x86_64-linux.

Honza, is it OK for trunk?

Thanks,

Martin


ipa-cp: Avoid an ICE processing self-recursive cloned edges (PR 93707)

2020-02-24  Martin Jambor  <mjambor@suse.cz>
	    Feng Xue  <fxue@os.amperecomputing.com>

	PR ipa/93707
	* ipa-cp.c (same_node_or_its_all_contexts_clone_p): Replaced with
	new function calls_same_node_or_its_all_contexts_clone_p.
	(cgraph_edge_brings_value_p): Use it.
	(cgraph_edge_brings_value_p): Likewise.
	(self_recursive_pass_through_p): Return false if caller is a clone.
	(self_recursive_agg_pass_through_p): Likewise.

	testsuite/
	* gcc.dg/ipa/pr93707.c: New test.
---
 gcc/ChangeLog                      | 11 ++++++
 gcc/ipa-cp.c                       | 55 +++++++++++++++++-------------
 gcc/testsuite/ChangeLog            |  6 ++++
 gcc/testsuite/gcc.dg/ipa/pr93707.c | 31 +++++++++++++++++
 4 files changed, 79 insertions(+), 24 deletions(-)
 create mode 100644 gcc/testsuite/gcc.dg/ipa/pr93707.c

diff --git a/gcc/ipa-cp.c b/gcc/ipa-cp.c
index 1d0c1ac0f35..27c020b8199 100644
--- a/gcc/ipa-cp.c
+++ b/gcc/ipa-cp.c
@@ -4035,15 +4035,25 @@ edge_clone_summary_t::duplicate (cgraph_edge *src_edge, cgraph_edge *dst_edge,
   src_data->next_clone = dst_edge;
 }
 
-/* Return true is NODE is DEST or its clone for all contexts.  */
+/* Return true is CS calls DEST or its clone for all contexts.  When
+   ALLOW_RECURSION_TO_CLONE is false, also return false for self-recursive
+   edges from/to an all-context clone.  */
 
 static bool
-same_node_or_its_all_contexts_clone_p (cgraph_node *node, cgraph_node *dest)
+calls_same_node_or_its_all_contexts_clone_p (cgraph_edge *cs, cgraph_node *dest,
+					     bool allow_recursion_to_clone)
 {
-  if (node == dest)
+  enum availability availability;
+  cgraph_node *callee = cs->callee->function_symbol (&availability);
+
+  if (availability <= AVAIL_INTERPOSABLE)
+    return false;
+  if (callee == dest)
     return true;
+  if (!allow_recursion_to_clone && cs->caller == callee)
+    return false;
 
-  class ipa_node_params *info = IPA_NODE_REF (node);
+  class ipa_node_params *info = IPA_NODE_REF (callee);
   return info->is_all_contexts_clone && info->ipcp_orig_node == dest;
 }
 
@@ -4055,11 +4065,8 @@ cgraph_edge_brings_value_p (cgraph_edge *cs, ipcp_value_source<tree> *src,
 			    cgraph_node *dest, ipcp_value<tree> *dest_val)
 {
   class ipa_node_params *caller_info = IPA_NODE_REF (cs->caller);
-  enum availability availability;
-  cgraph_node *real_dest = cs->callee->function_symbol (&availability);
 
-  if (availability <= AVAIL_INTERPOSABLE
-      || !same_node_or_its_all_contexts_clone_p (real_dest, dest)
+  if (!calls_same_node_or_its_all_contexts_clone_p (cs, dest, !src->val)
       || caller_info->node_dead)
     return false;
 
@@ -4078,9 +4085,6 @@ cgraph_edge_brings_value_p (cgraph_edge *cs, ipcp_value_source<tree> *src,
     }
   else
     {
-      /* At the moment we do not propagate over arithmetic jump functions in
-	 SCCs, so it is safe to detect self-feeding recursive calls in this
-	 way.  */
       if (src->val == dest_val)
 	return true;
 
@@ -4115,11 +4119,8 @@ cgraph_edge_brings_value_p (cgraph_edge *cs,
 			    ipcp_value<ipa_polymorphic_call_context> *)
 {
   class ipa_node_params *caller_info = IPA_NODE_REF (cs->caller);
-  enum availability avail;
-  cgraph_node *real_dest = cs->callee->function_symbol (&avail);
 
-  if (avail <= AVAIL_INTERPOSABLE
-      || !same_node_or_its_all_contexts_clone_p (real_dest, dest)
+  if (!calls_same_node_or_its_all_contexts_clone_p (cs, dest, true)
       || caller_info->node_dead)
     return false;
   if (!src->val)
@@ -4619,9 +4620,10 @@ create_specialized_node (struct cgraph_node *node,
   return new_node;
 }
 
-/* Return true, if JFUNC, which describes a i-th parameter of call CS, is a
-   pass-through function to itself.  When SIMPLE is true, further check if
-   JFUNC is a simple no-operation pass-through.  */
+/* Return true if JFUNC, which describes a i-th parameter of call CS, is a
+   pass-through function to itself when the cgraph_node involved is not an
+   IPA-CP clone.  When SIMPLE is true, further check if JFUNC is a simple
+   no-operation pass-through.  */
 
 static bool
 self_recursive_pass_through_p (cgraph_edge *cs, ipa_jump_func *jfunc, int i,
@@ -4632,15 +4634,18 @@ self_recursive_pass_through_p (cgraph_edge *cs, ipa_jump_func *jfunc, int i,
       && availability > AVAIL_INTERPOSABLE
       && jfunc->type == IPA_JF_PASS_THROUGH
       && (!simple || ipa_get_jf_pass_through_operation (jfunc) == NOP_EXPR)
-      && ipa_get_jf_pass_through_formal_id (jfunc) == i)
+      && ipa_get_jf_pass_through_formal_id (jfunc) == i
+      && IPA_NODE_REF (cs->caller)
+      && !IPA_NODE_REF (cs->caller)->ipcp_orig_node)
     return true;
   return false;
 }
 
-/* Return true, if JFUNC, which describes a part of an aggregate represented
-   or pointed to by the i-th parameter of call CS, is a pass-through function
-   to itself.  When SIMPLE is true, further check if JFUNC is a simple
-   no-operation pass-through.  */
+/* Return true if JFUNC, which describes a part of an aggregate represented or
+   pointed to by the i-th parameter of call CS, is a pass-through function to
+   itself when the cgraph_node involved is not an IPA-CP clone..  When
+   SIMPLE is true, further check if JFUNC is a simple no-operation
+   pass-through.  */
 
 static bool
 self_recursive_agg_pass_through_p (cgraph_edge *cs, ipa_agg_jf_item *jfunc,
@@ -4653,7 +4658,9 @@ self_recursive_agg_pass_through_p (cgraph_edge *cs, ipa_agg_jf_item *jfunc,
       && jfunc->offset == jfunc->value.load_agg.offset
       && (!simple || jfunc->value.pass_through.operation == NOP_EXPR)
       && jfunc->value.pass_through.formal_id == i
-      && useless_type_conversion_p (jfunc->value.load_agg.type, jfunc->type))
+      && useless_type_conversion_p (jfunc->value.load_agg.type, jfunc->type)
+      && IPA_NODE_REF (cs->caller)
+      && !IPA_NODE_REF (cs->caller)->ipcp_orig_node)
     return true;
   return false;
 }
diff --git a/gcc/testsuite/gcc.dg/ipa/pr93707.c b/gcc/testsuite/gcc.dg/ipa/pr93707.c
new file mode 100644
index 00000000000..685fae45020
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/ipa/pr93707.c
@@ -0,0 +1,31 @@
+/* { dg-do compile } */
+/* { dg-options "-O3 --param ipa-cp-eval-threshold=1 -fdump-ipa-cp" } */
+
+int foo();
+int data[100];
+
+__attribute__((noinline)) static int recur_fn (int i, int j, int depth)
+{
+   if (depth > 10)
+     return 1;
+
+   data[i + j]++;
+
+   if (depth & 3)
+     recur_fn (i, 1, depth + 1);
+   else
+     recur_fn (i, j & 1, depth + 1);
+
+   foo();
+
+   return i + j;
+}
+
+int caller (int v, int depth)
+{
+  recur_fn (1, v, depth);
+
+  return 0;
+}
+
+/* { dg-final { scan-ipa-dump-times "Clone of recur_fn/" 2 "cp" } } */
diff mbox series

Patch

From 74aef0cd2f40ff828a4b2abcbbdbbf4b1aea1fcf Mon Sep 17 00:00:00 2001
From: Feng Xue <fxue@os.amperecomputing.com>
Date: Tue, 21 Jan 2020 20:53:38 +0800
Subject: [PATCH] Generalized value pass-through for self-recusive function

---
 gcc/ipa-cp.c                       | 195 ++++++++++++++++++-----------
 gcc/testsuite/g++.dg/ipa/pr93203.C |  95 ++++++++++++++
 gcc/testsuite/gcc.dg/ipa/ipcp-1.c  |   2 +-
 3 files changed, 216 insertions(+), 76 deletions(-)
 create mode 100644 gcc/testsuite/g++.dg/ipa/pr93203.C

diff --git a/gcc/ipa-cp.c b/gcc/ipa-cp.c
index 17da1d8e8a7..64d23a34292 100644
--- a/gcc/ipa-cp.c
+++ b/gcc/ipa-cp.c
@@ -1850,7 +1850,7 @@  ipcp_lattice<valtype>::add_value (valtype newval, cgraph_edge *cs,
 	  {
 	    ipcp_value_source<valtype> *s;
 	    for (s = val->sources; s; s = s->next)
-	      if (s->cs == cs)
+	      if (s->cs == cs && s->val == src_val)
 		break;
 	    if (s)
 	      return false;
@@ -4207,6 +4207,33 @@  get_info_about_necessary_edges (ipcp_value<valtype> *val, cgraph_node *dest,
   return hot;
 }
 
+/* Given a NODE, and a set of its CALLERS, try to adjust order of the callers
+   to let a non-self-recursive caller be the first element.  Thus, we can
+   simplify intersecting operations on values that arrive from all of these
+   callers, especially when there exists self-recursive call.  Return true if
+   this kind of adjustment is possible.  */
+
+static bool
+adjust_callers_for_value_intersection (vec<cgraph_edge *> callers,
+				       cgraph_node *node)
+{
+  for (unsigned i = 0; i < callers.length (); i++)
+    {
+      cgraph_edge *cs = callers[i];
+
+      if (cs->caller != node)
+	{
+	  if (i > 0)
+	    {
+	      callers[i] = callers[0];
+	      callers[0] = cs;
+	    }
+	  return true;
+	}
+    }
+  return false;
+}
+
 /* Return a vector of incoming edges that do bring value VAL to node DEST.  It
    is assumed their number is known and equal to CALLER_COUNT.  */
 
@@ -4230,6 +4257,9 @@  gather_edges_for_value (ipcp_value<valtype> *val, cgraph_node *dest,
 	}
     }
 
+  if (caller_count > 1)
+    adjust_callers_for_value_intersection (ret, dest);
+
   return ret;
 }
 
@@ -4241,7 +4271,6 @@  get_replacement_map (class ipa_node_params *info, tree value, int parm_num)
 {
   struct ipa_replace_map *replace_map;
 
-
   replace_map = ggc_alloc<ipa_replace_map> ();
   if (dump_file)
     {
@@ -4592,36 +4621,40 @@  create_specialized_node (struct cgraph_node *node,
 }
 
 /* Return true, if JFUNC, which describes a i-th parameter of call CS, is a
-   simple no-operation pass-through function to itself.  */
+   pass-through function to itself.  When SIMPLE is true, further check if
+   JFUNC is a simple no-operation pass-through.  */
 
 static bool
-self_recursive_pass_through_p (cgraph_edge *cs, ipa_jump_func *jfunc, int i)
+self_recursive_pass_through_p (cgraph_edge *cs, ipa_jump_func *jfunc, int i,
+			       bool simple = true)
 {
   enum availability availability;
   if (cs->caller == cs->callee->function_symbol (&availability)
       && availability > AVAIL_INTERPOSABLE
       && jfunc->type == IPA_JF_PASS_THROUGH
-      && ipa_get_jf_pass_through_operation (jfunc) == NOP_EXPR
+      && (!simple || ipa_get_jf_pass_through_operation (jfunc) == NOP_EXPR)
       && ipa_get_jf_pass_through_formal_id (jfunc) == i)
     return true;
   return false;
 }
 
 /* Return true, if JFUNC, which describes a part of an aggregate represented
-   or pointed to by the i-th parameter of call CS, is a simple no-operation
-   pass-through function to itself.  */
+   or pointed to by the i-th parameter of call CS, is a pass-through function
+   to itself.  When SIMPLE is true, further check if JFUNC is a simple
+   no-operation pass-through.  */
 
 static bool
 self_recursive_agg_pass_through_p (cgraph_edge *cs, ipa_agg_jf_item *jfunc,
-				   int i)
+				   int i, bool simple = true)
 {
   enum availability availability;
   if (cs->caller == cs->callee->function_symbol (&availability)
       && availability > AVAIL_INTERPOSABLE
       && jfunc->jftype == IPA_JF_LOAD_AGG
       && jfunc->offset == jfunc->value.load_agg.offset
-      && jfunc->value.pass_through.operation == NOP_EXPR
-      && jfunc->value.pass_through.formal_id == i)
+      && (!simple || jfunc->value.pass_through.operation == NOP_EXPR)
+      && jfunc->value.pass_through.formal_id == i
+      && useless_type_conversion_p (jfunc->value.load_agg.type, jfunc->type))
     return true;
   return false;
 }
@@ -4653,9 +4686,6 @@  find_more_scalar_values_for_callers_subset (struct cgraph_node *node,
 	  struct ipa_jump_func *jump_func;
 	  tree t;
 
-	  if (IPA_NODE_REF (cs->caller) && IPA_NODE_REF (cs->caller)->node_dead)
-	    continue;
-
 	  if (!IPA_EDGE_REF (cs)
 	      || i >= ipa_get_cs_argument_count (IPA_EDGE_REF (cs))
 	      || (i == 0
@@ -4665,10 +4695,30 @@  find_more_scalar_values_for_callers_subset (struct cgraph_node *node,
 	      break;
 	    }
 	  jump_func = ipa_get_ith_jump_func (IPA_EDGE_REF (cs), i);
-	  if (self_recursive_pass_through_p (cs, jump_func, i))
-	    continue;
 
-	  t = ipa_value_from_jfunc (IPA_NODE_REF (cs->caller), jump_func, type);
+	  /* Besides simple pass-through jump function, arithmetic jump
+	     function could also introduce argument-direct-pass-through for
+	     self-feeding recursive call.  For example,
+
+	        fn (int i)
+	        {
+	          fn (i & 1);
+	        }
+
+	     Given that i is 0, recursive propagation via (i & 1) also gets
+	     0.  */
+	  if (self_recursive_pass_through_p (cs, jump_func, i, false))
+	    {
+	      gcc_assert (newval);
+	      t = ipa_get_jf_arith_result (
+				ipa_get_jf_pass_through_operation (jump_func),
+				newval,
+				ipa_get_jf_pass_through_operand (jump_func),
+				type);
+	    }
+	  else
+	    t = ipa_value_from_jfunc (IPA_NODE_REF (cs->caller), jump_func,
+				      type);
 	  if (!t
 	      || (newval
 		  && !values_equal_for_ipcp_p (t, newval))
@@ -4817,19 +4867,12 @@  intersect_with_plats (class ipcp_param_lattices *plats,
 	    break;
 	  if (aglat->offset - offset == item->offset)
 	    {
-	      gcc_checking_assert (item->value);
 	      if (aglat->is_single_const ())
 		{
 		  tree value = aglat->values->value;
 
 		  if (values_equal_for_ipcp_p (item->value, value))
 		    found = true;
-		  else if (item->value == error_mark_node)
-		    {
-		      /* Replace unknown place holder value with real one.  */
-		      item->value = value;
-		      found = true;
-		    }
 		}
 	      break;
 	    }
@@ -4898,12 +4941,6 @@  intersect_with_agg_replacements (struct cgraph_node *node, int index,
 	    {
 	      if (values_equal_for_ipcp_p (item->value, av->value))
 		found = true;
-	      else if (item->value == error_mark_node)
-		{
-		  /* Replace place holder value with real one.  */
-		  item->value = av->value;
-		  found = true;
-		}
 	      break;
 	    }
 	}
@@ -5008,31 +5045,16 @@  intersect_aggregates_with_edge (struct cgraph_edge *cs, int index,
 	for (unsigned i = 0; i < jfunc->agg.items->length (); i++)
 	  {
 	    struct ipa_agg_jf_item *agg_item = &(*jfunc->agg.items)[i];
-	    struct ipa_agg_value agg_value;
-
-	    if (self_recursive_agg_pass_through_p (cs, agg_item, index))
-	      {
-		/* For a self-recursive call, if aggregate jump function is a
-		   simple pass-through, the exact value that it stands for is
-		   not known at this point, which must comes from other call
-		   sites.  But we still need to add a place holder in value
-		   sets to indicate it, here we use error_mark_node to
-		   represent the special unknown value, which will be replaced
-		   with real one during later intersecting operations.  */
-		agg_value.value = error_mark_node;
-	      }
-	    else
+	    tree value = ipa_agg_value_from_node (caller_info, cs->caller,
+						  agg_item);
+	    if (value)
 	      {
-		tree value = ipa_agg_value_from_node (caller_info, cs->caller,
-						      agg_item);
-		if (!value)
-		  continue;
+		struct ipa_agg_value agg_value;
 
 		agg_value.value = value;
+		agg_value.offset = agg_item->offset;
+		inter.safe_push (agg_value);
 	      }
-
-	    agg_value.offset = agg_item->offset;
-	    inter.safe_push (agg_value);
 	  }
       else
 	FOR_EACH_VEC_ELT (inter, k, item)
@@ -5053,25 +5075,32 @@  intersect_aggregates_with_edge (struct cgraph_edge *cs, int index,
 		  {
 		    tree value;
 
-		    if (self_recursive_agg_pass_through_p (cs, ti, index))
-		      {
-			/* A simple aggregate pass-through in self-recursive
-			   call should lead to same value.  */
-			found = true;
-		      }
-		    else if ((value = ipa_agg_value_from_node (caller_info,
-							     cs->caller, ti)))
-		      {
-			if (values_equal_for_ipcp_p (item->value, value))
-			  found = true;
-			else if (item->value == error_mark_node)
-			  {
-			    /* Replace unknown place holder value with real
-			       one.  */
-			    item->value = value;
-			    found = true;
-			  }
-		      }
+		    /* Besides simple pass-through aggregate jump function,
+		       arithmetic aggregate jump function could also bring
+		       same aggregate value as parameter passed-in for
+		       self-feeding recursive call.  For example,
+
+		         fn (int *i)
+		           {
+		             int j = *i & 1;
+		             fn (&j);
+		           }
+
+		       Given that *i is 0, recursive propagation via (*i & 1)
+		       also gets 0.  */
+		    if (self_recursive_agg_pass_through_p (cs, ti, index,
+							   false))
+		      value = ipa_get_jf_arith_result (
+					ti->value.pass_through.operation,
+					item->value,
+					ti->value.pass_through.operand,
+					ti->type);
+		    else
+		      value = ipa_agg_value_from_node (caller_info,
+						       cs->caller, ti);
+
+		    if (value && values_equal_for_ipcp_p (item->value, value))
+		      found = true;
 		    break;
 		  }
 		l++;
@@ -5147,9 +5176,6 @@  find_aggregate_values_for_callers_subset (struct cgraph_node *node,
 	  if (!item->value)
 	    continue;
 
-	  /* All values must be real values, not unknown place holders.  */
-	  gcc_assert (item->value != error_mark_node);
-
 	  v = ggc_alloc<ipa_agg_replacement_value> ();
 	  v->index = i;
 	  v->offset = item->offset;
@@ -5545,13 +5571,33 @@  decide_whether_version_node (struct cgraph_node *node)
   if (info->do_clone_for_all_contexts)
     {
       struct cgraph_node *clone;
-      vec<cgraph_edge *> callers;
+      vec<cgraph_edge *> callers = node->collect_callers ();
+
+      for (int i = callers.length () - 1; i >= 0; i--)
+	{
+	  cgraph_edge *cs = callers[i];
+	  class ipa_node_params *caller_info = IPA_NODE_REF (cs->caller);
+
+	  if (caller_info && caller_info->node_dead)
+	    callers.unordered_remove (i);
+	}
+
+      if (!adjust_callers_for_value_intersection (callers, node))
+	{
+	  /* If node is not called by anyone, or all its caller edges are
+	     self-recursive, the node is not really be in use, no need to
+	     do cloning.  */
+	  callers.release ();
+	  known_csts.release ();
+	  known_contexts.release ();
+	  info->do_clone_for_all_contexts = false;
+	  return ret;
+	}
 
       if (dump_file)
 	fprintf (dump_file, " - Creating a specialized node of %s "
 		 "for all known contexts.\n", node->dump_name ());
 
-      callers = node->collect_callers ();
       find_more_scalar_values_for_callers_subset (node, known_csts, callers);
       find_more_contexts_for_caller_subset (node, &known_contexts, callers);
       ipa_agg_replacement_value *aggvals
@@ -5564,7 +5610,6 @@  decide_whether_version_node (struct cgraph_node *node)
 	}
       clone = create_specialized_node (node, known_csts, known_contexts,
 				       aggvals, callers);
-      info = IPA_NODE_REF (node);
       info->do_clone_for_all_contexts = false;
       IPA_NODE_REF (clone)->is_all_contexts_clone = true;
       ret = true;
diff --git a/gcc/testsuite/g++.dg/ipa/pr93203.C b/gcc/testsuite/g++.dg/ipa/pr93203.C
new file mode 100644
index 00000000000..b4cd69001b5
--- /dev/null
+++ b/gcc/testsuite/g++.dg/ipa/pr93203.C
@@ -0,0 +1,95 @@ 
+/* { dg-do compile } */
+/* { dg-options "-O3 -w -std=gnu++11" } */
+
+class a {
+public:
+  a(char *);
+};
+class ad {
+public:
+  ad(a *);
+};
+class b {};
+using ah = class ak {
+  using al = char;
+
+public:
+  ak(b) : ak(0) {}
+  ak an() { return ap & 1; }
+  al ap;
+  ak(al af) : ap(af) {}
+};
+struct at {
+  ah au;
+  int av;
+  char aw;
+  char ax;
+};
+class az {
+public:
+  struct ba {
+    void bb(ak am) {
+      ak bc = am.an();
+      bb(bc);
+    }
+  };
+  void bd(ak am) { be.bb(am); }
+  ba be;
+};
+class bg {
+public:
+  int bh;
+  at bi;
+};
+int bj;
+int *bk;
+int c;
+class bl {
+  bool bm();
+  bg bn;
+  az bo;
+  int e;
+  int bq;
+};
+class bs {
+public:
+  bs(int *, ah *, char *, char *, int);
+};
+template < typename bt > class bu : bs {
+  using bv = typename bt::bv;
+
+public:
+  template < typename... bx >
+  bu(a *by, int *bz, at body, bx...)
+      : bs(bz, &body.au, &body.aw, &body.ax, body.av), ca(bx()...), cb(by),
+        cc(by), cd(by), ce(by) {}
+  void cf() {
+    auto cg = ch();
+    auto ci = *cj();
+    ca.ck(this, cg, &ci);
+  }
+  bt ca;
+  ad cb;
+  ad cc;
+  ad cd;
+  ad ce;
+  bv *cj();
+  bv ch();
+};
+class cl {
+public:
+  using bv = struct {};
+  cl(az *, int, int, int, int, a *, int, int **);
+  void ck(bs *, bv, bv *) {
+    b d;
+    ak ci(d);
+    bo.bd(ci);
+  }
+  az bo;
+};
+bool bl::bm() {
+  a by("");
+  bu< cl > cn(&by, &bj, bn.bi, &bo, c, bn.bh, e, 0, &by, bq, &bk);
+  cn.cf();
+}
+
diff --git a/gcc/testsuite/gcc.dg/ipa/ipcp-1.c b/gcc/testsuite/gcc.dg/ipa/ipcp-1.c
index 952694d302b..baa9c97ffb0 100644
--- a/gcc/testsuite/gcc.dg/ipa/ipcp-1.c
+++ b/gcc/testsuite/gcc.dg/ipa/ipcp-1.c
@@ -45,7 +45,7 @@  main (int argc, char *argv[])
 }
 
 
-/* { dg-final { scan-ipa-dump "Creating a specialized node of f.*for all known contexts" "cp" } } */
+/* { dg-final { scan-ipa-dump "Creating a specialized node of f" "cp" } } */
 /* { dg-final { scan-ipa-dump "replacing param .0 a with const 7" "cp"  } } */
 
 
-- 
2.17.1