diff mbox series

Replace evrp use in loop versioning with ranger.

Message ID 20210724141937.2325339-1-aldyh@redhat.com
State New
Headers show
Series Replace evrp use in loop versioning with ranger. | expand

Commit Message

Aldy Hernandez July 24, 2021, 2:19 p.m. UTC
This patch replaces the evrp_range_analyzer in the loop versioning code
with an on-demand ranger.

Everything was pretty straightforward, except that range_of_expr requires
a gimple statement as context to provide context aware ranges.  I didn't see
a convient place where the statement was saved, so I made a vector indexed
by SSA names.  As an alternative, I tried to use the loop's first statement,
but that proved to be insufficient.

I am not familiar with loop versioning, but if the DOM walk was only
necessary for the calls to record_ranges_from_stmt, this too could be
removed as the ranger will work without it.

Tested on x86-64 Linux.

OK?

gcc/ChangeLog:

	* gimple-loop-versioning.cc (lv_dom_walker::lv_dom_walker): Remove
	use of m_range_analyzer.
	(loop_versioning::lv_dom_walker::before_dom_children): Same.
	(loop_versioning::lv_dom_walker::after_dom_children): Remove.
	(loop_versioning::loop_versioning): Allocate space for
	m_ssa_context.
	(loop_versioning::version_for_unity): Set m_ssa_context.
	(loop_versioning::prune_loop_conditions): Replace vr_values use
	with range_query interface.
	(pass_loop_versioning::execute): Use ranger.
---
 gcc/gimple-loop-versioning.cc | 49 ++++++++++++++---------------------
 1 file changed, 20 insertions(+), 29 deletions(-)

Comments

Richard Sandiford July 26, 2021, 2:18 p.m. UTC | #1
Aldy Hernandez <aldyh@redhat.com> writes:
> This patch replaces the evrp_range_analyzer in the loop versioning code
> with an on-demand ranger.
>
> Everything was pretty straightforward, except that range_of_expr requires
> a gimple statement as context to provide context aware ranges.  I didn't see
> a convient place where the statement was saved, so I made a vector indexed
> by SSA names.  As an alternative, I tried to use the loop's first statement,
> but that proved to be insufficient.

The mapping is one-to-many though: there can be multiple statements
for each SSA name.  Maybe that doesn't matter in this context and
any of the statements can act as a representative.

I'm surprised that the loop's first statement didn't work though,
since the SSA name is supposedly known to be loop-invariant.  What went
wrong when you tried that?

> I am not familiar with loop versioning, but if the DOM walk was only
> necessary for the calls to record_ranges_from_stmt, this too could be
> removed as the ranger will work without it.

Yeah, that was the only reason.  If the information is available at
version_for_unity (I guess it is) then we should just avoid recording
the versioning there if so.

How expensive is the check?  If the result is worth caching, perhaps
we should have two bitmaps: the existing one, and one that records
whether we've checked a particular SSA name.

If the check is relatively cheap then that won't be worth it though.

Thanks,
Richard

>
> Tested on x86-64 Linux.
>
> OK?
>
> gcc/ChangeLog:
>
> 	* gimple-loop-versioning.cc (lv_dom_walker::lv_dom_walker): Remove
> 	use of m_range_analyzer.
> 	(loop_versioning::lv_dom_walker::before_dom_children): Same.
> 	(loop_versioning::lv_dom_walker::after_dom_children): Remove.
> 	(loop_versioning::loop_versioning): Allocate space for
> 	m_ssa_context.
> 	(loop_versioning::version_for_unity): Set m_ssa_context.
> 	(loop_versioning::prune_loop_conditions): Replace vr_values use
> 	with range_query interface.
> 	(pass_loop_versioning::execute): Use ranger.
> ---
>  gcc/gimple-loop-versioning.cc | 49 ++++++++++++++---------------------
>  1 file changed, 20 insertions(+), 29 deletions(-)
>
> diff --git a/gcc/gimple-loop-versioning.cc b/gcc/gimple-loop-versioning.cc
> index 4b70c5a4aab..987994d4995 100644
> --- a/gcc/gimple-loop-versioning.cc
> +++ b/gcc/gimple-loop-versioning.cc
> @@ -30,19 +30,17 @@ along with GCC; see the file COPYING3.  If not see
>  #include "tree-ssa-loop.h"
>  #include "ssa.h"
>  #include "tree-scalar-evolution.h"
> -#include "tree-chrec.h"
>  #include "tree-ssa-loop-ivopts.h"
>  #include "fold-const.h"
>  #include "tree-ssa-propagate.h"
>  #include "tree-inline.h"
>  #include "domwalk.h"
> -#include "alloc-pool.h"
> -#include "vr-values.h"
> -#include "gimple-ssa-evrp-analyze.h"
>  #include "tree-vectorizer.h"
>  #include "omp-general.h"
>  #include "predict.h"
>  #include "tree-into-ssa.h"
> +#include "gimple-range.h"
> +#include "tree-cfg.h"
>  
>  namespace {
>  
> @@ -261,14 +259,10 @@ private:
>      lv_dom_walker (loop_versioning &);
>  
>      edge before_dom_children (basic_block) FINAL OVERRIDE;
> -    void after_dom_children (basic_block) FINAL OVERRIDE;
>  
>    private:
>      /* The parent pass.  */
>      loop_versioning &m_lv;
> -
> -    /* Used to build context-dependent range information.  */
> -    evrp_range_analyzer m_range_analyzer;
>    };
>  
>    /* Used to simplify statements based on conditions that are established
> @@ -308,7 +302,7 @@ private:
>    bool analyze_block (basic_block);
>    bool analyze_blocks ();
>  
> -  void prune_loop_conditions (class loop *, vr_values *);
> +  void prune_loop_conditions (class loop *);
>    bool prune_conditions ();
>  
>    void merge_loop_info (class loop *, class loop *);
> @@ -355,6 +349,9 @@ private:
>  
>    /* A list of all addresses in M_ADDRESS_TABLE, in a predictable order.  */
>    auto_vec <address_info *, 32> m_address_list;
> +
> +  /* Gimple context for each SSA in loop_info::unity_names.  */
> +  auto_vec <gimple *> m_ssa_context;
>  };
>  
>  /* If EXPR is an SSA name and not a default definition, return the
> @@ -500,7 +497,7 @@ loop_info::worth_versioning_p () const
>  }
>  
>  loop_versioning::lv_dom_walker::lv_dom_walker (loop_versioning &lv)
> -  : dom_walker (CDI_DOMINATORS), m_lv (lv), m_range_analyzer (false)
> +  : dom_walker (CDI_DOMINATORS), m_lv (lv)
>  {
>  }
>  
> @@ -509,26 +506,12 @@ loop_versioning::lv_dom_walker::lv_dom_walker (loop_versioning &lv)
>  edge
>  loop_versioning::lv_dom_walker::before_dom_children (basic_block bb)
>  {
> -  m_range_analyzer.enter (bb);
> -
>    if (bb == bb->loop_father->header)
> -    m_lv.prune_loop_conditions (bb->loop_father, &m_range_analyzer);
> -
> -  for (gimple_stmt_iterator si = gsi_start_bb (bb); !gsi_end_p (si);
> -       gsi_next (&si))
> -    m_range_analyzer.record_ranges_from_stmt (gsi_stmt (si), false);
> +    m_lv.prune_loop_conditions (bb->loop_father);
>  
>    return NULL;
>  }
>  
> -/* Process BB after processing the blocks it dominates.  */
> -
> -void
> -loop_versioning::lv_dom_walker::after_dom_children (basic_block bb)
> -{
> -  m_range_analyzer.leave (bb);
> -}
> -
>  /* Decide whether to replace VAL with a new value in a versioned loop.
>     Return the new value if so, otherwise return null.  */
>  
> @@ -549,6 +532,7 @@ loop_versioning::loop_versioning (function *fn)
>      m_num_conditions (0),
>      m_address_table (31)
>  {
> +  m_ssa_context.safe_grow (num_ssa_names);
>    bitmap_obstack_initialize (&m_bitmap_obstack);
>    gcc_obstack_init (&m_obstack);
>  
> @@ -644,6 +628,8 @@ loop_versioning::version_for_unity (gimple *stmt, tree name)
>        if (loop_depth (li.outermost) < loop_depth (outermost))
>  	li.outermost = outermost;
>  
> +      m_ssa_context[SSA_NAME_VERSION(name)] = stmt;
> +
>        if (dump_enabled_p ())
>  	{
>  	  dump_printf_loc (MSG_NOTE, stmt, "want to version containing loop"
> @@ -1483,18 +1469,20 @@ loop_versioning::analyze_blocks ()
>     LOOP.  */
>  
>  void
> -loop_versioning::prune_loop_conditions (class loop *loop, vr_values *vrs)
> +loop_versioning::prune_loop_conditions (class loop *loop)
>  {
>    loop_info &li = get_loop_info (loop);
>  
>    int to_remove = -1;
>    bitmap_iterator bi;
>    unsigned int i;
> +  int_range_max r;
>    EXECUTE_IF_SET_IN_BITMAP (&li.unity_names, 0, i, bi)
>      {
>        tree name = ssa_name (i);
> -      const value_range_equiv *vr = vrs->get_value_range (name);
> -      if (vr && !vr->may_contain_p (build_one_cst (TREE_TYPE (name))))
> +      gimple *stmt = m_ssa_context[SSA_NAME_VERSION (name)];
> +      if (get_range_query (cfun)->range_of_expr (r, name, stmt)
> +	  && !r.contains_p (build_one_cst (TREE_TYPE (name))))
>  	{
>  	  if (dump_enabled_p ())
>  	    dump_printf_loc (MSG_NOTE, find_loop_location (loop),
> @@ -1810,7 +1798,10 @@ pass_loop_versioning::execute (function *fn)
>    if (number_of_loops (fn) <= 1)
>      return 0;
>  
> -  return loop_versioning (fn).run ();
> +  enable_ranger (fn);
> +  unsigned int ret = loop_versioning (fn).run ();
> +  disable_ranger (fn);
> +  return ret;
>  }
>  
>  } // anon namespace
Aldy Hernandez July 26, 2021, 3:16 p.m. UTC | #2
On Mon, Jul 26, 2021 at 4:18 PM Richard Sandiford
<richard.sandiford@arm.com> wrote:
>
> Aldy Hernandez <aldyh@redhat.com> writes:
> > This patch replaces the evrp_range_analyzer in the loop versioning code
> > with an on-demand ranger.
> >
> > Everything was pretty straightforward, except that range_of_expr requires
> > a gimple statement as context to provide context aware ranges.  I didn't see
> > a convient place where the statement was saved, so I made a vector indexed
> > by SSA names.  As an alternative, I tried to use the loop's first statement,
> > but that proved to be insufficient.
>
> The mapping is one-to-many though: there can be multiple statements
> for each SSA name.  Maybe that doesn't matter in this context and
> any of the statements can act as a representative.
>
> I'm surprised that the loop's first statement didn't work though,
> since the SSA name is supposedly known to be loop-invariant.  What went
> wrong when you tried that?

I was looking at the first statement of loop_info->block_list and one
of the dg.exp=loop-versioning* tests failed.  Perhaps I should have
used the loop itself, as in the attached patch.  With this patch all
of the loop-versioning tests pass.

>
> > I am not familiar with loop versioning, but if the DOM walk was only
> > necessary for the calls to record_ranges_from_stmt, this too could be
> > removed as the ranger will work without it.
>
> Yeah, that was the only reason.  If the information is available at
> version_for_unity (I guess it is) then we should just avoid recording
> the versioning there if so.
>
> How expensive is the check?  If the result is worth caching, perhaps
> we should have two bitmaps: the existing one, and one that records
> whether we've checked a particular SSA name.
>
> If the check is relatively cheap then that won't be worth it though.

If you're asking about the range_of_expr check, that's all cached, so
it should be pretty cheap.  Besides, we're no longer calculating
ranges for each statement in the IL, as we were doing in lv_dom_walker
with evrp's record_ranges_from_stmt.  Only statements of interest are
queried.

How about this patch, pending tests?

Aldy
Aldy Hernandez July 26, 2021, 4:08 p.m. UTC | #3
> > How expensive is the check?  If the result is worth caching, perhaps
> > we should have two bitmaps: the existing one, and one that records
> > whether we've checked a particular SSA name.
> >
> > If the check is relatively cheap then that won't be worth it though.
>
> If you're asking about the range_of_expr check, that's all cached, so
> it should be pretty cheap.  Besides, we're no longer calculating
> ranges for each statement in the IL, as we were doing in lv_dom_walker
> with evrp's record_ranges_from_stmt.  Only statements of interest are
> queried.

You know, we already have infrastructure to test performance of small
changes with callgrind.  If you want I can compare numbers for:

1. mainline right now
2. my proposed patch
3. my proposed patch without a dom walk

Aldy
Richard Sandiford July 26, 2021, 5:28 p.m. UTC | #4
Aldy Hernandez <aldyh@redhat.com> writes:
> On Mon, Jul 26, 2021 at 4:18 PM Richard Sandiford
> <richard.sandiford@arm.com> wrote:
>>
>> Aldy Hernandez <aldyh@redhat.com> writes:
>> > This patch replaces the evrp_range_analyzer in the loop versioning code
>> > with an on-demand ranger.
>> >
>> > Everything was pretty straightforward, except that range_of_expr requires
>> > a gimple statement as context to provide context aware ranges.  I didn't see
>> > a convient place where the statement was saved, so I made a vector indexed
>> > by SSA names.  As an alternative, I tried to use the loop's first statement,
>> > but that proved to be insufficient.
>>
>> The mapping is one-to-many though: there can be multiple statements
>> for each SSA name.  Maybe that doesn't matter in this context and
>> any of the statements can act as a representative.
>>
>> I'm surprised that the loop's first statement didn't work though,
>> since the SSA name is supposedly known to be loop-invariant.  What went
>> wrong when you tried that?
>
> I was looking at the first statement of loop_info->block_list and one
> of the dg.exp=loop-versioning* tests failed.  Perhaps I should have
> used the loop itself, as in the attached patch.  With this patch all
> of the loop-versioning tests pass.
>
>>
>> > I am not familiar with loop versioning, but if the DOM walk was only
>> > necessary for the calls to record_ranges_from_stmt, this too could be
>> > removed as the ranger will work without it.
>>
>> Yeah, that was the only reason.  If the information is available at
>> version_for_unity (I guess it is) then we should just avoid recording
>> the versioning there if so.
>>
>> How expensive is the check?  If the result is worth caching, perhaps
>> we should have two bitmaps: the existing one, and one that records
>> whether we've checked a particular SSA name.
>>
>> If the check is relatively cheap then that won't be worth it though.
>
> If you're asking about the range_of_expr check, that's all cached, so
> it should be pretty cheap.  Besides, we're no longer calculating
> ranges for each statement in the IL, as we were doing in lv_dom_walker
> with evrp's record_ranges_from_stmt.  Only statements of interest are
> queried.

Sounds good.  If the results are already cached then another level
of caching (via the second bitmap I mentioned above) would obviously
be a waste of time.

> How about this patch, pending tests?

OK, thanks, as a strict improvement over the status quo.  But it'd be
even better without the dom walk :-)

Richard

>
> Aldy
>
> From 8a350003d950869499d729921008abdb491d3a5e Mon Sep 17 00:00:00 2001
> From: Aldy Hernandez <aldyh@redhat.com>
> Date: Sat, 24 Jul 2021 12:29:28 +0200
> Subject: [PATCH] Replace evrp use in loop versioning with ranger.
>
> This patch replaces the evrp_range_analyzer in the loop versioning code
> with an on-demand ranger.
>
> Tested on x86-64 Linux.
>
> gcc/ChangeLog:
>
> 	* gimple-loop-versioning.cc (lv_dom_walker::lv_dom_walker): Remove
> 	use of m_range_analyzer.
> 	(loop_versioning::lv_dom_walker::before_dom_children): Same.
> 	(loop_versioning::lv_dom_walker::after_dom_children): Remove.
> 	(loop_versioning::prune_loop_conditions): Replace vr_values use
> 	with range_query interface.
> 	(pass_loop_versioning::execute): Use ranger.
> ---
>  gcc/gimple-loop-versioning.cc | 44 ++++++++++++-----------------------
>  1 file changed, 15 insertions(+), 29 deletions(-)
>
> diff --git a/gcc/gimple-loop-versioning.cc b/gcc/gimple-loop-versioning.cc
> index 4b70c5a4aab..46c3a508c8d 100644
> --- a/gcc/gimple-loop-versioning.cc
> +++ b/gcc/gimple-loop-versioning.cc
> @@ -30,19 +30,17 @@ along with GCC; see the file COPYING3.  If not see
>  #include "tree-ssa-loop.h"
>  #include "ssa.h"
>  #include "tree-scalar-evolution.h"
> -#include "tree-chrec.h"
>  #include "tree-ssa-loop-ivopts.h"
>  #include "fold-const.h"
>  #include "tree-ssa-propagate.h"
>  #include "tree-inline.h"
>  #include "domwalk.h"
> -#include "alloc-pool.h"
> -#include "vr-values.h"
> -#include "gimple-ssa-evrp-analyze.h"
>  #include "tree-vectorizer.h"
>  #include "omp-general.h"
>  #include "predict.h"
>  #include "tree-into-ssa.h"
> +#include "gimple-range.h"
> +#include "tree-cfg.h"
>  
>  namespace {
>  
> @@ -261,14 +259,10 @@ private:
>      lv_dom_walker (loop_versioning &);
>  
>      edge before_dom_children (basic_block) FINAL OVERRIDE;
> -    void after_dom_children (basic_block) FINAL OVERRIDE;
>  
>    private:
>      /* The parent pass.  */
>      loop_versioning &m_lv;
> -
> -    /* Used to build context-dependent range information.  */
> -    evrp_range_analyzer m_range_analyzer;
>    };
>  
>    /* Used to simplify statements based on conditions that are established
> @@ -308,7 +302,7 @@ private:
>    bool analyze_block (basic_block);
>    bool analyze_blocks ();
>  
> -  void prune_loop_conditions (class loop *, vr_values *);
> +  void prune_loop_conditions (class loop *);
>    bool prune_conditions ();
>  
>    void merge_loop_info (class loop *, class loop *);
> @@ -500,7 +494,7 @@ loop_info::worth_versioning_p () const
>  }
>  
>  loop_versioning::lv_dom_walker::lv_dom_walker (loop_versioning &lv)
> -  : dom_walker (CDI_DOMINATORS), m_lv (lv), m_range_analyzer (false)
> +  : dom_walker (CDI_DOMINATORS), m_lv (lv)
>  {
>  }
>  
> @@ -509,26 +503,12 @@ loop_versioning::lv_dom_walker::lv_dom_walker (loop_versioning &lv)
>  edge
>  loop_versioning::lv_dom_walker::before_dom_children (basic_block bb)
>  {
> -  m_range_analyzer.enter (bb);
> -
>    if (bb == bb->loop_father->header)
> -    m_lv.prune_loop_conditions (bb->loop_father, &m_range_analyzer);
> -
> -  for (gimple_stmt_iterator si = gsi_start_bb (bb); !gsi_end_p (si);
> -       gsi_next (&si))
> -    m_range_analyzer.record_ranges_from_stmt (gsi_stmt (si), false);
> +    m_lv.prune_loop_conditions (bb->loop_father);
>  
>    return NULL;
>  }
>  
> -/* Process BB after processing the blocks it dominates.  */
> -
> -void
> -loop_versioning::lv_dom_walker::after_dom_children (basic_block bb)
> -{
> -  m_range_analyzer.leave (bb);
> -}
> -
>  /* Decide whether to replace VAL with a new value in a versioned loop.
>     Return the new value if so, otherwise return null.  */
>  
> @@ -1483,18 +1463,21 @@ loop_versioning::analyze_blocks ()
>     LOOP.  */
>  
>  void
> -loop_versioning::prune_loop_conditions (class loop *loop, vr_values *vrs)
> +loop_versioning::prune_loop_conditions (class loop *loop)
>  {
>    loop_info &li = get_loop_info (loop);
>  
>    int to_remove = -1;
>    bitmap_iterator bi;
>    unsigned int i;
> +  int_range_max r;
>    EXECUTE_IF_SET_IN_BITMAP (&li.unity_names, 0, i, bi)
>      {
>        tree name = ssa_name (i);
> -      const value_range_equiv *vr = vrs->get_value_range (name);
> -      if (vr && !vr->may_contain_p (build_one_cst (TREE_TYPE (name))))
> +      gimple *stmt = first_stmt (loop->header);
> +
> +      if (get_range_query (cfun)->range_of_expr (r, name, stmt)
> +	  && !r.contains_p (build_one_cst (TREE_TYPE (name))))
>  	{
>  	  if (dump_enabled_p ())
>  	    dump_printf_loc (MSG_NOTE, find_loop_location (loop),
> @@ -1810,7 +1793,10 @@ pass_loop_versioning::execute (function *fn)
>    if (number_of_loops (fn) <= 1)
>      return 0;
>  
> -  return loop_versioning (fn).run ();
> +  enable_ranger (fn);
> +  unsigned int ret = loop_versioning (fn).run ();
> +  disable_ranger (fn);
> +  return ret;
>  }
>  
>  } // anon namespace
Aldy Hernandez July 27, 2021, 9:52 a.m. UTC | #5
On Mon, Jul 26, 2021 at 7:28 PM Richard Sandiford
<richard.sandiford@arm.com> wrote:
>
> Aldy Hernandez <aldyh@redhat.com> writes:
> > On Mon, Jul 26, 2021 at 4:18 PM Richard Sandiford
> > <richard.sandiford@arm.com> wrote:
> >>
> >> Aldy Hernandez <aldyh@redhat.com> writes:
> >> > This patch replaces the evrp_range_analyzer in the loop versioning code
> >> > with an on-demand ranger.
> >> >
> >> > Everything was pretty straightforward, except that range_of_expr requires
> >> > a gimple statement as context to provide context aware ranges.  I didn't see
> >> > a convient place where the statement was saved, so I made a vector indexed
> >> > by SSA names.  As an alternative, I tried to use the loop's first statement,
> >> > but that proved to be insufficient.
> >>
> >> The mapping is one-to-many though: there can be multiple statements
> >> for each SSA name.  Maybe that doesn't matter in this context and
> >> any of the statements can act as a representative.
> >>
> >> I'm surprised that the loop's first statement didn't work though,
> >> since the SSA name is supposedly known to be loop-invariant.  What went
> >> wrong when you tried that?
> >
> > I was looking at the first statement of loop_info->block_list and one
> > of the dg.exp=loop-versioning* tests failed.  Perhaps I should have
> > used the loop itself, as in the attached patch.  With this patch all
> > of the loop-versioning tests pass.
> >
> >>
> >> > I am not familiar with loop versioning, but if the DOM walk was only
> >> > necessary for the calls to record_ranges_from_stmt, this too could be
> >> > removed as the ranger will work without it.
> >>
> >> Yeah, that was the only reason.  If the information is available at
> >> version_for_unity (I guess it is) then we should just avoid recording
> >> the versioning there if so.
> >>
> >> How expensive is the check?  If the result is worth caching, perhaps
> >> we should have two bitmaps: the existing one, and one that records
> >> whether we've checked a particular SSA name.
> >>
> >> If the check is relatively cheap then that won't be worth it though.
> >
> > If you're asking about the range_of_expr check, that's all cached, so
> > it should be pretty cheap.  Besides, we're no longer calculating
> > ranges for each statement in the IL, as we were doing in lv_dom_walker
> > with evrp's record_ranges_from_stmt.  Only statements of interest are
> > queried.
>
> Sounds good.  If the results are already cached then another level
> of caching (via the second bitmap I mentioned above) would obviously
> be a waste of time.

My callgrind harness for performance testing wasn't able to pick up
enough samples to measure the time spent in
pass_loop_versioning::execute.  I've seen this happen before with
passes that run too fast.  I'm afraid I don't have enough cycles to
continue working on this.

>
> > How about this patch, pending tests?
>
> OK, thanks, as a strict improvement over the status quo.  But it'd be
> even better without the dom walk :-)

I've removed the DOM walk, and re-tested.

OK to push?

Aldy
Richard Sandiford July 30, 2021, 8:39 a.m. UTC | #6
Aldy Hernandez <aldyh@redhat.com> writes:
> On Mon, Jul 26, 2021 at 7:28 PM Richard Sandiford
> <richard.sandiford@arm.com> wrote:
>>
>> Aldy Hernandez <aldyh@redhat.com> writes:
>> > On Mon, Jul 26, 2021 at 4:18 PM Richard Sandiford
>> > <richard.sandiford@arm.com> wrote:
>> >>
>> >> Aldy Hernandez <aldyh@redhat.com> writes:
>> >> > This patch replaces the evrp_range_analyzer in the loop versioning code
>> >> > with an on-demand ranger.
>> >> >
>> >> > Everything was pretty straightforward, except that range_of_expr requires
>> >> > a gimple statement as context to provide context aware ranges.  I didn't see
>> >> > a convient place where the statement was saved, so I made a vector indexed
>> >> > by SSA names.  As an alternative, I tried to use the loop's first statement,
>> >> > but that proved to be insufficient.
>> >>
>> >> The mapping is one-to-many though: there can be multiple statements
>> >> for each SSA name.  Maybe that doesn't matter in this context and
>> >> any of the statements can act as a representative.
>> >>
>> >> I'm surprised that the loop's first statement didn't work though,
>> >> since the SSA name is supposedly known to be loop-invariant.  What went
>> >> wrong when you tried that?
>> >
>> > I was looking at the first statement of loop_info->block_list and one
>> > of the dg.exp=loop-versioning* tests failed.  Perhaps I should have
>> > used the loop itself, as in the attached patch.  With this patch all
>> > of the loop-versioning tests pass.
>> >
>> >>
>> >> > I am not familiar with loop versioning, but if the DOM walk was only
>> >> > necessary for the calls to record_ranges_from_stmt, this too could be
>> >> > removed as the ranger will work without it.
>> >>
>> >> Yeah, that was the only reason.  If the information is available at
>> >> version_for_unity (I guess it is) then we should just avoid recording
>> >> the versioning there if so.
>> >>
>> >> How expensive is the check?  If the result is worth caching, perhaps
>> >> we should have two bitmaps: the existing one, and one that records
>> >> whether we've checked a particular SSA name.
>> >>
>> >> If the check is relatively cheap then that won't be worth it though.
>> >
>> > If you're asking about the range_of_expr check, that's all cached, so
>> > it should be pretty cheap.  Besides, we're no longer calculating
>> > ranges for each statement in the IL, as we were doing in lv_dom_walker
>> > with evrp's record_ranges_from_stmt.  Only statements of interest are
>> > queried.
>>
>> Sounds good.  If the results are already cached then another level
>> of caching (via the second bitmap I mentioned above) would obviously
>> be a waste of time.
>
> My callgrind harness for performance testing wasn't able to pick up
> enough samples to measure the time spent in
> pass_loop_versioning::execute.  I've seen this happen before with
> passes that run too fast.  I'm afraid I don't have enough cycles to
> continue working on this.

Yeah, any testing of this was above and beyond IMO.  Hearing that the
range query does its own caching was enough for me. :-)

>> > How about this patch, pending tests?
>>
>> OK, thanks, as a strict improvement over the status quo.  But it'd be
>> even better without the dom walk :-)
>
> I've removed the DOM walk, and re-tested.
>
> OK to push?

Sorry for asking for another iteration, but…

> Aldy
>
> From 9b1cba95377e7b26b4f0495b1b5998d2f7f33a14 Mon Sep 17 00:00:00 2001
> From: Aldy Hernandez <aldyh@redhat.com>
> Date: Sat, 24 Jul 2021 12:29:28 +0200
> Subject: [PATCH] Replace evrp use in loop versioning with ranger.
>
> This patch replaces the evrp_range_analyzer in the loop versioning code
> with a ranger.
>
> Tested on x86-64 Linux.
>
> gcc/ChangeLog:
>
> 	* gimple-loop-versioning.cc (lv_dom_walker::lv_dom_walker): Remove.
> 	(loop_versioning::lv_dom_walker::before_dom_children): Remove.
> 	(loop_versioning::lv_dom_walker::after_dom_children): Remove.
> 	(loop_versioning::prune_loop_conditions): Replace vr_values use
> 	with range_query interface.
> 	(loop_versioning::prune_conditions): Replace dom walk with
> 	straight iteration.
> 	(pass_loop_versioning::execute): Use ranger.
> ---
>  gcc/gimple-loop-versioning.cc | 78 ++++++++---------------------------
>  1 file changed, 18 insertions(+), 60 deletions(-)
>
> diff --git a/gcc/gimple-loop-versioning.cc b/gcc/gimple-loop-versioning.cc
> index 4b70c5a4aab..52eb6429171 100644
> --- a/gcc/gimple-loop-versioning.cc
> +++ b/gcc/gimple-loop-versioning.cc
> @@ -30,19 +30,17 @@ along with GCC; see the file COPYING3.  If not see
>  #include "tree-ssa-loop.h"
>  #include "ssa.h"
>  #include "tree-scalar-evolution.h"
> -#include "tree-chrec.h"
>  #include "tree-ssa-loop-ivopts.h"
>  #include "fold-const.h"
>  #include "tree-ssa-propagate.h"
>  #include "tree-inline.h"
>  #include "domwalk.h"
> -#include "alloc-pool.h"
> -#include "vr-values.h"
> -#include "gimple-ssa-evrp-analyze.h"
>  #include "tree-vectorizer.h"
>  #include "omp-general.h"
>  #include "predict.h"
>  #include "tree-into-ssa.h"
> +#include "gimple-range.h"
> +#include "tree-cfg.h"
>  
>  namespace {
>  
> @@ -253,24 +251,6 @@ public:
>    unsigned int run ();
>  
>  private:
> -  /* Used to walk the dominator tree to find loop versioning conditions
> -     that are always false.  */
> -  class lv_dom_walker : public dom_walker
> -  {
> -  public:
> -    lv_dom_walker (loop_versioning &);
> -
> -    edge before_dom_children (basic_block) FINAL OVERRIDE;
> -    void after_dom_children (basic_block) FINAL OVERRIDE;
> -
> -  private:
> -    /* The parent pass.  */
> -    loop_versioning &m_lv;
> -
> -    /* Used to build context-dependent range information.  */
> -    evrp_range_analyzer m_range_analyzer;
> -  };
> -
>    /* Used to simplify statements based on conditions that are established
>       by the version checks.  */
>    class name_prop : public substitute_and_fold_engine
> @@ -308,7 +288,7 @@ private:
>    bool analyze_block (basic_block);
>    bool analyze_blocks ();
>  
> -  void prune_loop_conditions (class loop *, vr_values *);
> +  void prune_loop_conditions (class loop *);
>    bool prune_conditions ();
>  
>    void merge_loop_info (class loop *, class loop *);
> @@ -499,36 +479,6 @@ loop_info::worth_versioning_p () const
>  	  && (!bitmap_empty_p (&unity_names) || subloops_benefit_p));
>  }
>  
> -loop_versioning::lv_dom_walker::lv_dom_walker (loop_versioning &lv)
> -  : dom_walker (CDI_DOMINATORS), m_lv (lv), m_range_analyzer (false)
> -{
> -}
> -
> -/* Process BB before processing the blocks it dominates.  */
> -
> -edge
> -loop_versioning::lv_dom_walker::before_dom_children (basic_block bb)
> -{
> -  m_range_analyzer.enter (bb);
> -
> -  if (bb == bb->loop_father->header)
> -    m_lv.prune_loop_conditions (bb->loop_father, &m_range_analyzer);
> -
> -  for (gimple_stmt_iterator si = gsi_start_bb (bb); !gsi_end_p (si);
> -       gsi_next (&si))
> -    m_range_analyzer.record_ranges_from_stmt (gsi_stmt (si), false);
> -
> -  return NULL;
> -}
> -
> -/* Process BB after processing the blocks it dominates.  */
> -
> -void
> -loop_versioning::lv_dom_walker::after_dom_children (basic_block bb)
> -{
> -  m_range_analyzer.leave (bb);
> -}
> -
>  /* Decide whether to replace VAL with a new value in a versioned loop.
>     Return the new value if so, otherwise return null.  */
>  
> @@ -1483,18 +1433,21 @@ loop_versioning::analyze_blocks ()
>     LOOP.  */
>  
>  void
> -loop_versioning::prune_loop_conditions (class loop *loop, vr_values *vrs)
> +loop_versioning::prune_loop_conditions (class loop *loop)
>  {
>    loop_info &li = get_loop_info (loop);
>  
>    int to_remove = -1;
>    bitmap_iterator bi;
>    unsigned int i;
> +  int_range_max r;
>    EXECUTE_IF_SET_IN_BITMAP (&li.unity_names, 0, i, bi)
>      {
>        tree name = ssa_name (i);
> -      const value_range_equiv *vr = vrs->get_value_range (name);
> -      if (vr && !vr->may_contain_p (build_one_cst (TREE_TYPE (name))))
> +      gimple *stmt = first_stmt (loop->header);
> +
> +      if (get_range_query (cfun)->range_of_expr (r, name, stmt)
> +	  && !r.contains_p (build_one_cst (TREE_TYPE (name))))
>  	{
>  	  if (dump_enabled_p ())
>  	    dump_printf_loc (MSG_NOTE, find_loop_location (loop),
> @@ -1519,9 +1472,11 @@ loop_versioning::prune_conditions ()
>    AUTO_DUMP_SCOPE ("prune_loop_conditions",
>  		   dump_user_location_t::from_function_decl (m_fn->decl));
>  
> -  calculate_dominance_info (CDI_DOMINATORS);
> -  lv_dom_walker dom_walker (*this);
> -  dom_walker.walk (ENTRY_BLOCK_PTR_FOR_FN (m_fn));
> +  basic_block bb;
> +  FOR_EACH_BB_FN (bb, m_fn)
> +    if (bb == bb->loop_father->header)
> +      prune_loop_conditions (bb->loop_father);

If we were going to keep pruning as a separate step, I think we should
iterate over loops rather than blocks.

However, what I meant by;

>> >> If the information is available at
>> >> version_for_unity (I guess it is) then we should just avoid recording
>> >> the versioning there if so.

is that we should instead put the get_range_query (cfun)->range_of_expr
and !r.contains_p test…

------------------------------------------------------------------------
void
loop_versioning::version_for_unity (gimple *stmt, tree name)
{
  class loop *loop = loop_containing_stmt (stmt);
  loop_info &li = get_loop_info (loop);

…here
  if (bitmap_set_bit (&li.unity_names, SSA_NAME_VERSION (name)))
------------------------------------------------------------------------

and report that the value can't be 1 at that point.  There would then
be no need for a separate pruning step.  Having this range information
on tap makes the pass much simpler than it used to be. :-)

FAOD, I think it would be good to keep using first_stmt (loop->header)
(as in your patch) rather than use the stmt argument to version_for_unity.

Thanks,
Richard


> +
>    return m_num_conditions != 0;
>  }
>  
> @@ -1810,7 +1765,10 @@ pass_loop_versioning::execute (function *fn)
>    if (number_of_loops (fn) <= 1)
>      return 0;
>  
> -  return loop_versioning (fn).run ();
> +  enable_ranger (fn);
> +  unsigned int ret = loop_versioning (fn).run ();
> +  disable_ranger (fn);
> +  return ret;
>  }
>  
>  } // anon namespace
Aldy Hernandez July 30, 2021, 9:34 a.m. UTC | #7
On 7/30/21 10:39 AM, Richard Sandiford wrote:
> Aldy Hernandez <aldyh@redhat.com> writes:
>> On Mon, Jul 26, 2021 at 7:28 PM Richard Sandiford
>> <richard.sandiford@arm.com> wrote:
>>>
>>> Aldy Hernandez <aldyh@redhat.com> writes:
>>>> On Mon, Jul 26, 2021 at 4:18 PM Richard Sandiford
>>>> <richard.sandiford@arm.com> wrote:
>>>>>
>>>>> Aldy Hernandez <aldyh@redhat.com> writes:
>>>>>> This patch replaces the evrp_range_analyzer in the loop versioning code
>>>>>> with an on-demand ranger.
>>>>>>
>>>>>> Everything was pretty straightforward, except that range_of_expr requires
>>>>>> a gimple statement as context to provide context aware ranges.  I didn't see
>>>>>> a convient place where the statement was saved, so I made a vector indexed
>>>>>> by SSA names.  As an alternative, I tried to use the loop's first statement,
>>>>>> but that proved to be insufficient.
>>>>>
>>>>> The mapping is one-to-many though: there can be multiple statements
>>>>> for each SSA name.  Maybe that doesn't matter in this context and
>>>>> any of the statements can act as a representative.
>>>>>
>>>>> I'm surprised that the loop's first statement didn't work though,
>>>>> since the SSA name is supposedly known to be loop-invariant.  What went
>>>>> wrong when you tried that?
>>>>
>>>> I was looking at the first statement of loop_info->block_list and one
>>>> of the dg.exp=loop-versioning* tests failed.  Perhaps I should have
>>>> used the loop itself, as in the attached patch.  With this patch all
>>>> of the loop-versioning tests pass.
>>>>
>>>>>
>>>>>> I am not familiar with loop versioning, but if the DOM walk was only
>>>>>> necessary for the calls to record_ranges_from_stmt, this too could be
>>>>>> removed as the ranger will work without it.
>>>>>
>>>>> Yeah, that was the only reason.  If the information is available at
>>>>> version_for_unity (I guess it is) then we should just avoid recording
>>>>> the versioning there if so.
>>>>>
>>>>> How expensive is the check?  If the result is worth caching, perhaps
>>>>> we should have two bitmaps: the existing one, and one that records
>>>>> whether we've checked a particular SSA name.
>>>>>
>>>>> If the check is relatively cheap then that won't be worth it though.
>>>>
>>>> If you're asking about the range_of_expr check, that's all cached, so
>>>> it should be pretty cheap.  Besides, we're no longer calculating
>>>> ranges for each statement in the IL, as we were doing in lv_dom_walker
>>>> with evrp's record_ranges_from_stmt.  Only statements of interest are
>>>> queried.
>>>
>>> Sounds good.  If the results are already cached then another level
>>> of caching (via the second bitmap I mentioned above) would obviously
>>> be a waste of time.
>>
>> My callgrind harness for performance testing wasn't able to pick up
>> enough samples to measure the time spent in
>> pass_loop_versioning::execute.  I've seen this happen before with
>> passes that run too fast.  I'm afraid I don't have enough cycles to
>> continue working on this.
> 
> Yeah, any testing of this was above and beyond IMO.  Hearing that the
> range query does its own caching was enough for me. :-)
> 
>>>> How about this patch, pending tests?
>>>
>>> OK, thanks, as a strict improvement over the status quo.  But it'd be
>>> even better without the dom walk :-)
>>
>> I've removed the DOM walk, and re-tested.
>>
>> OK to push?
> 
> Sorry for asking for another iteration, but…

It looks like this is a bit more involved than I originally envisioned.

I've pushed the original (approved) patch that just removes the use of 
evrp, which was my main goal.

I'll follow-up with the dom walk removal and your suggested changes next 
week when I have more cycles.

Aldy
diff mbox series

Patch

diff --git a/gcc/gimple-loop-versioning.cc b/gcc/gimple-loop-versioning.cc
index 4b70c5a4aab..987994d4995 100644
--- a/gcc/gimple-loop-versioning.cc
+++ b/gcc/gimple-loop-versioning.cc
@@ -30,19 +30,17 @@  along with GCC; see the file COPYING3.  If not see
 #include "tree-ssa-loop.h"
 #include "ssa.h"
 #include "tree-scalar-evolution.h"
-#include "tree-chrec.h"
 #include "tree-ssa-loop-ivopts.h"
 #include "fold-const.h"
 #include "tree-ssa-propagate.h"
 #include "tree-inline.h"
 #include "domwalk.h"
-#include "alloc-pool.h"
-#include "vr-values.h"
-#include "gimple-ssa-evrp-analyze.h"
 #include "tree-vectorizer.h"
 #include "omp-general.h"
 #include "predict.h"
 #include "tree-into-ssa.h"
+#include "gimple-range.h"
+#include "tree-cfg.h"
 
 namespace {
 
@@ -261,14 +259,10 @@  private:
     lv_dom_walker (loop_versioning &);
 
     edge before_dom_children (basic_block) FINAL OVERRIDE;
-    void after_dom_children (basic_block) FINAL OVERRIDE;
 
   private:
     /* The parent pass.  */
     loop_versioning &m_lv;
-
-    /* Used to build context-dependent range information.  */
-    evrp_range_analyzer m_range_analyzer;
   };
 
   /* Used to simplify statements based on conditions that are established
@@ -308,7 +302,7 @@  private:
   bool analyze_block (basic_block);
   bool analyze_blocks ();
 
-  void prune_loop_conditions (class loop *, vr_values *);
+  void prune_loop_conditions (class loop *);
   bool prune_conditions ();
 
   void merge_loop_info (class loop *, class loop *);
@@ -355,6 +349,9 @@  private:
 
   /* A list of all addresses in M_ADDRESS_TABLE, in a predictable order.  */
   auto_vec <address_info *, 32> m_address_list;
+
+  /* Gimple context for each SSA in loop_info::unity_names.  */
+  auto_vec <gimple *> m_ssa_context;
 };
 
 /* If EXPR is an SSA name and not a default definition, return the
@@ -500,7 +497,7 @@  loop_info::worth_versioning_p () const
 }
 
 loop_versioning::lv_dom_walker::lv_dom_walker (loop_versioning &lv)
-  : dom_walker (CDI_DOMINATORS), m_lv (lv), m_range_analyzer (false)
+  : dom_walker (CDI_DOMINATORS), m_lv (lv)
 {
 }
 
@@ -509,26 +506,12 @@  loop_versioning::lv_dom_walker::lv_dom_walker (loop_versioning &lv)
 edge
 loop_versioning::lv_dom_walker::before_dom_children (basic_block bb)
 {
-  m_range_analyzer.enter (bb);
-
   if (bb == bb->loop_father->header)
-    m_lv.prune_loop_conditions (bb->loop_father, &m_range_analyzer);
-
-  for (gimple_stmt_iterator si = gsi_start_bb (bb); !gsi_end_p (si);
-       gsi_next (&si))
-    m_range_analyzer.record_ranges_from_stmt (gsi_stmt (si), false);
+    m_lv.prune_loop_conditions (bb->loop_father);
 
   return NULL;
 }
 
-/* Process BB after processing the blocks it dominates.  */
-
-void
-loop_versioning::lv_dom_walker::after_dom_children (basic_block bb)
-{
-  m_range_analyzer.leave (bb);
-}
-
 /* Decide whether to replace VAL with a new value in a versioned loop.
    Return the new value if so, otherwise return null.  */
 
@@ -549,6 +532,7 @@  loop_versioning::loop_versioning (function *fn)
     m_num_conditions (0),
     m_address_table (31)
 {
+  m_ssa_context.safe_grow (num_ssa_names);
   bitmap_obstack_initialize (&m_bitmap_obstack);
   gcc_obstack_init (&m_obstack);
 
@@ -644,6 +628,8 @@  loop_versioning::version_for_unity (gimple *stmt, tree name)
       if (loop_depth (li.outermost) < loop_depth (outermost))
 	li.outermost = outermost;
 
+      m_ssa_context[SSA_NAME_VERSION(name)] = stmt;
+
       if (dump_enabled_p ())
 	{
 	  dump_printf_loc (MSG_NOTE, stmt, "want to version containing loop"
@@ -1483,18 +1469,20 @@  loop_versioning::analyze_blocks ()
    LOOP.  */
 
 void
-loop_versioning::prune_loop_conditions (class loop *loop, vr_values *vrs)
+loop_versioning::prune_loop_conditions (class loop *loop)
 {
   loop_info &li = get_loop_info (loop);
 
   int to_remove = -1;
   bitmap_iterator bi;
   unsigned int i;
+  int_range_max r;
   EXECUTE_IF_SET_IN_BITMAP (&li.unity_names, 0, i, bi)
     {
       tree name = ssa_name (i);
-      const value_range_equiv *vr = vrs->get_value_range (name);
-      if (vr && !vr->may_contain_p (build_one_cst (TREE_TYPE (name))))
+      gimple *stmt = m_ssa_context[SSA_NAME_VERSION (name)];
+      if (get_range_query (cfun)->range_of_expr (r, name, stmt)
+	  && !r.contains_p (build_one_cst (TREE_TYPE (name))))
 	{
 	  if (dump_enabled_p ())
 	    dump_printf_loc (MSG_NOTE, find_loop_location (loop),
@@ -1810,7 +1798,10 @@  pass_loop_versioning::execute (function *fn)
   if (number_of_loops (fn) <= 1)
     return 0;
 
-  return loop_versioning (fn).run ();
+  enable_ranger (fn);
+  unsigned int ret = loop_versioning (fn).run ();
+  disable_ranger (fn);
+  return ret;
 }
 
 } // anon namespace