diff mbox series

middle-end: add two debug counters for early-break vectorization debugging

Message ID patch-18254-tamar@arm.com
State New
Headers show
Series middle-end: add two debug counters for early-break vectorization debugging | expand

Commit Message

Tamar Christina Feb. 8, 2024, 1:47 p.m. UTC
Hi All,

This adds two new debug counter to aid in debugging early break code.

- vect_force_last_exit: when reached will always force the final loop exit.
- vect_skip_exit: when reached will skip selecting the current candidate exit
		  as the loop exit.

The first counter essentially allows you to turn off the PEELED case and the
second counter to pick a different exit, which may mean you pick no exit at
all.

Bootstrapped Regtested on aarch64-none-linux-gnu and no issues.

Ok for master?

Thanks,
Tamar

gcc/ChangeLog:

	* dbgcnt.def (vect_force_last_exit, vect_skip_exit): New.
	* tree-vect-loop.cc (vec_init_loop_exit_info): Use them.

--- inline copy of patch -- 
diff --git a/gcc/dbgcnt.def b/gcc/dbgcnt.def
index ed9f062eac2c28c52df76b39d4312dd9fde1c800..8f7bebf93fceabdf6ae86c2df591eae4848b8a5c 100644




--
diff --git a/gcc/dbgcnt.def b/gcc/dbgcnt.def
index ed9f062eac2c28c52df76b39d4312dd9fde1c800..8f7bebf93fceabdf6ae86c2df591eae4848b8a5c 100644
--- a/gcc/dbgcnt.def
+++ b/gcc/dbgcnt.def
@@ -213,5 +213,7 @@ DEBUG_COUNTER (stv_conversion)
 DEBUG_COUNTER (tail_call)
 DEBUG_COUNTER (tree_sra)
 DEBUG_COUNTER (treepre_insert)
+DEBUG_COUNTER (vect_force_last_exit)
 DEBUG_COUNTER (vect_loop)
+DEBUG_COUNTER (vect_skip_exit)
 DEBUG_COUNTER (vect_slp)
diff --git a/gcc/tree-vect-loop.cc b/gcc/tree-vect-loop.cc
index eed2268e9bae7e7ad36d13da03e0b54eab26ef6f..854e9d78bc71721e6559a6bc5dff78c813603a78 100644
--- a/gcc/tree-vect-loop.cc
+++ b/gcc/tree-vect-loop.cc
@@ -58,6 +58,7 @@ along with GCC; see the file COPYING3.  If not see
 #include "tree-eh.h"
 #include "case-cfn-macros.h"
 #include "langhooks.h"
+#include "dbgcnt.h"
 
 /* Loop Vectorization Pass.
 
@@ -977,6 +978,20 @@ vec_init_loop_exit_info (class loop *loop)
   if (exits.length () == 1)
     return exits[0];
 
+  /* Check to see if we've been asked to force the last exit.  */
+  if (!dbg_cnt (vect_force_last_exit))
+    {
+      basic_block bb = ip_normal_pos (loop);
+      if (!bb)
+	return NULL;
+
+      edge exit = EDGE_SUCC (bb, 0);
+      if (exit->dest == loop->latch)
+	return EDGE_SUCC (bb, 1);
+
+      return exit;
+    }
+
   /* If we have multiple exits we only support counting IV at the moment.
      Analyze all exits and return the last one we can analyze.  */
   class tree_niter_desc niter_desc;
@@ -998,6 +1013,7 @@ vec_init_loop_exit_info (class loop *loop)
 		   && exit->src == single_pred (loop->latch)
 		   && (integer_nonzerop (may_be_zero)
 		       || COMPARISON_CLASS_P (may_be_zero))))
+	      && dbg_cnt (vect_skip_exit)
 	      && (!candidate
 		  || dominated_by_p (CDI_DOMINATORS, exit->src,
 				     candidate->src)))

Comments

Richard Biener Feb. 8, 2024, 2:15 p.m. UTC | #1
On Thu, 8 Feb 2024, Tamar Christina wrote:

> Hi All,
> 
> This adds two new debug counter to aid in debugging early break code.
> 
> - vect_force_last_exit: when reached will always force the final loop exit.
> - vect_skip_exit: when reached will skip selecting the current candidate exit
> 		  as the loop exit.
> 
> The first counter essentially allows you to turn off the PEELED case and the
> second counter to pick a different exit, which may mean you pick no exit at
> all.
> 
> Bootstrapped Regtested on aarch64-none-linux-gnu and no issues.
> 
> Ok for master?
> 
> Thanks,
> Tamar
> 
> gcc/ChangeLog:
> 
> 	* dbgcnt.def (vect_force_last_exit, vect_skip_exit): New.
> 	* tree-vect-loop.cc (vec_init_loop_exit_info): Use them.
> 
> --- inline copy of patch -- 
> diff --git a/gcc/dbgcnt.def b/gcc/dbgcnt.def
> index ed9f062eac2c28c52df76b39d4312dd9fde1c800..8f7bebf93fceabdf6ae86c2df591eae4848b8a5c 100644
> --- a/gcc/dbgcnt.def
> +++ b/gcc/dbgcnt.def
> @@ -213,5 +213,7 @@ DEBUG_COUNTER (stv_conversion)
>  DEBUG_COUNTER (tail_call)
>  DEBUG_COUNTER (tree_sra)
>  DEBUG_COUNTER (treepre_insert)
> +DEBUG_COUNTER (vect_force_last_exit)
>  DEBUG_COUNTER (vect_loop)
> +DEBUG_COUNTER (vect_skip_exit)
>  DEBUG_COUNTER (vect_slp)
> diff --git a/gcc/tree-vect-loop.cc b/gcc/tree-vect-loop.cc
> index eed2268e9bae7e7ad36d13da03e0b54eab26ef6f..854e9d78bc71721e6559a6bc5dff78c813603a78 100644
> --- a/gcc/tree-vect-loop.cc
> +++ b/gcc/tree-vect-loop.cc
> @@ -58,6 +58,7 @@ along with GCC; see the file COPYING3.  If not see
>  #include "tree-eh.h"
>  #include "case-cfn-macros.h"
>  #include "langhooks.h"
> +#include "dbgcnt.h"
>  
>  /* Loop Vectorization Pass.
>  
> @@ -977,6 +978,20 @@ vec_init_loop_exit_info (class loop *loop)
>    if (exits.length () == 1)
>      return exits[0];
>  
> +  /* Check to see if we've been asked to force the last exit.  */
> +  if (!dbg_cnt (vect_force_last_exit))
> +    {
> +      basic_block bb = ip_normal_pos (loop);
> +      if (!bb)
> +	return NULL;
> +
> +      edge exit = EDGE_SUCC (bb, 0);
> +      if (exit->dest == loop->latch)
> +	return EDGE_SUCC (bb, 1);
> +
> +      return exit;

Err, that's quite odd.  Why not just below do

> +    }
> +
>    /* If we have multiple exits we only support counting IV at the moment.
>       Analyze all exits and return the last one we can analyze.  */
>    class tree_niter_desc niter_desc;
> @@ -998,6 +1013,7 @@ vec_init_loop_exit_info (class loop *loop)
>  		   && exit->src == single_pred (loop->latch)
>  		   && (integer_nonzerop (may_be_zero)
>  		       || COMPARISON_CLASS_P (may_be_zero))))
> +	      && dbg_cnt (vect_skip_exit)

  && (dbg_cnt (vect_force_last_exit)
      || exit->src == single_pred (loop->latch))

(also computed above already)?  It's also oddly named, it's more like 
vect_allow_peeled_exit or so.

It's also seemingly redundant with vect_skip_exit, no?

Note the counter gets incremented even if we'd not consider the exit
because we have a later candidate already.

I fear it's going to be quite random even with the debug counter.

Can you see whether it really helps you?

>  	      && (!candidate
>  		  || dominated_by_p (CDI_DOMINATORS, exit->src,
>  				     candidate->src)))
> 
> 
> 
> 
>
Tamar Christina Feb. 8, 2024, 2:41 p.m. UTC | #2
> -----Original Message-----
> From: Richard Biener <rguenther@suse.de>
> Sent: Thursday, February 8, 2024 2:16 PM
> To: Tamar Christina <Tamar.Christina@arm.com>
> Cc: gcc-patches@gcc.gnu.org; nd <nd@arm.com>; jlaw@ventanamicro.com
> Subject: Re: [PATCH]middle-end: add two debug counters for early-break
> vectorization debugging
> 
> On Thu, 8 Feb 2024, Tamar Christina wrote:
> 
> > Hi All,
> >
> > This adds two new debug counter to aid in debugging early break code.
> >
> > - vect_force_last_exit: when reached will always force the final loop exit.
> > - vect_skip_exit: when reached will skip selecting the current candidate exit
> > 		  as the loop exit.
> >
> > The first counter essentially allows you to turn off the PEELED case and the
> > second counter to pick a different exit, which may mean you pick no exit at
> > all.
> >
> > Bootstrapped Regtested on aarch64-none-linux-gnu and no issues.
> >
> > Ok for master?
> >
> > Thanks,
> > Tamar
> >
> > gcc/ChangeLog:
> >
> > 	* dbgcnt.def (vect_force_last_exit, vect_skip_exit): New.
> > 	* tree-vect-loop.cc (vec_init_loop_exit_info): Use them.
> >
> > --- inline copy of patch --
> > diff --git a/gcc/dbgcnt.def b/gcc/dbgcnt.def
> > index
> ed9f062eac2c28c52df76b39d4312dd9fde1c800..8f7bebf93fceabdf6ae86c2df5
> 91eae4848b8a5c 100644
> > --- a/gcc/dbgcnt.def
> > +++ b/gcc/dbgcnt.def
> > @@ -213,5 +213,7 @@ DEBUG_COUNTER (stv_conversion)
> >  DEBUG_COUNTER (tail_call)
> >  DEBUG_COUNTER (tree_sra)
> >  DEBUG_COUNTER (treepre_insert)
> > +DEBUG_COUNTER (vect_force_last_exit)
> >  DEBUG_COUNTER (vect_loop)
> > +DEBUG_COUNTER (vect_skip_exit)
> >  DEBUG_COUNTER (vect_slp)
> > diff --git a/gcc/tree-vect-loop.cc b/gcc/tree-vect-loop.cc
> > index
> eed2268e9bae7e7ad36d13da03e0b54eab26ef6f..854e9d78bc71721e6559a6bc
> 5dff78c813603a78 100644
> > --- a/gcc/tree-vect-loop.cc
> > +++ b/gcc/tree-vect-loop.cc
> > @@ -58,6 +58,7 @@ along with GCC; see the file COPYING3.  If not see
> >  #include "tree-eh.h"
> >  #include "case-cfn-macros.h"
> >  #include "langhooks.h"
> > +#include "dbgcnt.h"
> >
> >  /* Loop Vectorization Pass.
> >
> > @@ -977,6 +978,20 @@ vec_init_loop_exit_info (class loop *loop)
> >    if (exits.length () == 1)
> >      return exits[0];
> >
> > +  /* Check to see if we've been asked to force the last exit.  */
> > +  if (!dbg_cnt (vect_force_last_exit))
> > +    {
> > +      basic_block bb = ip_normal_pos (loop);
> > +      if (!bb)
> > +	return NULL;
> > +
> > +      edge exit = EDGE_SUCC (bb, 0);
> > +      if (exit->dest == loop->latch)
> > +	return EDGE_SUCC (bb, 1);
> > +
> > +      return exit;
> 
> Err, that's quite odd.  Why not just below do
> 
> > +    }
> > +
> >    /* If we have multiple exits we only support counting IV at the moment.
> >       Analyze all exits and return the last one we can analyze.  */
> >    class tree_niter_desc niter_desc;
> > @@ -998,6 +1013,7 @@ vec_init_loop_exit_info (class loop *loop)
> >  		   && exit->src == single_pred (loop->latch)
> >  		   && (integer_nonzerop (may_be_zero)
> >  		       || COMPARISON_CLASS_P (may_be_zero))))
> > +	      && dbg_cnt (vect_skip_exit)
> 
>   && (dbg_cnt (vect_force_last_exit)
>       || exit->src == single_pred (loop->latch))
> 
> (also computed above already)?  It's also oddly named, it's more like
> vect_allow_peeled_exit or so.

Because this isn't deterministic. If a loop has n exits the above always forces
you to pick the final one regardless of n, rather than just skip consideration of an exit.

And in that case is there a point in analyzing all the exits just to throw away the information?

Doing in inside the consideration check would only skip one exit unless I'm misunderstanding.

> 
> It's also seemingly redundant with vect_skip_exit, no?
> 
> Note the counter gets incremented even if we'd not consider the exit
> because we have a later candidate already.
> 
> I fear it's going to be quite random even with the debug counter.

It is, I think the first counter is more useful. But in general the reason I kept the second counter
which kinda does what was suggested in the RFC I sent before was that it should in theory at
least allow us to test forcing of a PEELED case. Since we generally prefer the non-PEELED case
if possible.

At least that was the intention.

Thanks,
Tamar

> 
> Can you see whether it really helps you?
> 
> >  	      && (!candidate
> >  		  || dominated_by_p (CDI_DOMINATORS, exit->src,
> >  				     candidate->src)))
> >
> >
> >
> >
> >
> 
> --
> Richard Biener <rguenther@suse.de>
> SUSE Software Solutions Germany GmbH,
> Frankenstrasse 146, 90461 Nuernberg, Germany;
> GF: Ivo Totev, Andrew McDonald, Werner Knoblich; (HRB 36809, AG Nuernberg)
Richard Biener Feb. 8, 2024, 2:49 p.m. UTC | #3
On Thu, 8 Feb 2024, Tamar Christina wrote:

> > -----Original Message-----
> > From: Richard Biener <rguenther@suse.de>
> > Sent: Thursday, February 8, 2024 2:16 PM
> > To: Tamar Christina <Tamar.Christina@arm.com>
> > Cc: gcc-patches@gcc.gnu.org; nd <nd@arm.com>; jlaw@ventanamicro.com
> > Subject: Re: [PATCH]middle-end: add two debug counters for early-break
> > vectorization debugging
> > 
> > On Thu, 8 Feb 2024, Tamar Christina wrote:
> > 
> > > Hi All,
> > >
> > > This adds two new debug counter to aid in debugging early break code.
> > >
> > > - vect_force_last_exit: when reached will always force the final loop exit.
> > > - vect_skip_exit: when reached will skip selecting the current candidate exit
> > > 		  as the loop exit.
> > >
> > > The first counter essentially allows you to turn off the PEELED case and the
> > > second counter to pick a different exit, which may mean you pick no exit at
> > > all.
> > >
> > > Bootstrapped Regtested on aarch64-none-linux-gnu and no issues.
> > >
> > > Ok for master?
> > >
> > > Thanks,
> > > Tamar
> > >
> > > gcc/ChangeLog:
> > >
> > > 	* dbgcnt.def (vect_force_last_exit, vect_skip_exit): New.
> > > 	* tree-vect-loop.cc (vec_init_loop_exit_info): Use them.
> > >
> > > --- inline copy of patch --
> > > diff --git a/gcc/dbgcnt.def b/gcc/dbgcnt.def
> > > index
> > ed9f062eac2c28c52df76b39d4312dd9fde1c800..8f7bebf93fceabdf6ae86c2df5
> > 91eae4848b8a5c 100644
> > > --- a/gcc/dbgcnt.def
> > > +++ b/gcc/dbgcnt.def
> > > @@ -213,5 +213,7 @@ DEBUG_COUNTER (stv_conversion)
> > >  DEBUG_COUNTER (tail_call)
> > >  DEBUG_COUNTER (tree_sra)
> > >  DEBUG_COUNTER (treepre_insert)
> > > +DEBUG_COUNTER (vect_force_last_exit)
> > >  DEBUG_COUNTER (vect_loop)
> > > +DEBUG_COUNTER (vect_skip_exit)
> > >  DEBUG_COUNTER (vect_slp)
> > > diff --git a/gcc/tree-vect-loop.cc b/gcc/tree-vect-loop.cc
> > > index
> > eed2268e9bae7e7ad36d13da03e0b54eab26ef6f..854e9d78bc71721e6559a6bc
> > 5dff78c813603a78 100644
> > > --- a/gcc/tree-vect-loop.cc
> > > +++ b/gcc/tree-vect-loop.cc
> > > @@ -58,6 +58,7 @@ along with GCC; see the file COPYING3.  If not see
> > >  #include "tree-eh.h"
> > >  #include "case-cfn-macros.h"
> > >  #include "langhooks.h"
> > > +#include "dbgcnt.h"
> > >
> > >  /* Loop Vectorization Pass.
> > >
> > > @@ -977,6 +978,20 @@ vec_init_loop_exit_info (class loop *loop)
> > >    if (exits.length () == 1)
> > >      return exits[0];
> > >
> > > +  /* Check to see if we've been asked to force the last exit.  */
> > > +  if (!dbg_cnt (vect_force_last_exit))
> > > +    {
> > > +      basic_block bb = ip_normal_pos (loop);
> > > +      if (!bb)
> > > +	return NULL;
> > > +
> > > +      edge exit = EDGE_SUCC (bb, 0);
> > > +      if (exit->dest == loop->latch)
> > > +	return EDGE_SUCC (bb, 1);
> > > +
> > > +      return exit;
> > 
> > Err, that's quite odd.  Why not just below do
> > 
> > > +    }
> > > +
> > >    /* If we have multiple exits we only support counting IV at the moment.
> > >       Analyze all exits and return the last one we can analyze.  */
> > >    class tree_niter_desc niter_desc;
> > > @@ -998,6 +1013,7 @@ vec_init_loop_exit_info (class loop *loop)
> > >  		   && exit->src == single_pred (loop->latch)
> > >  		   && (integer_nonzerop (may_be_zero)
> > >  		       || COMPARISON_CLASS_P (may_be_zero))))
> > > +	      && dbg_cnt (vect_skip_exit)
> > 
> >   && (dbg_cnt (vect_force_last_exit)
> >       || exit->src == single_pred (loop->latch))
> > 
> > (also computed above already)?  It's also oddly named, it's more like
> > vect_allow_peeled_exit or so.
> 
> Because this isn't deterministic. If a loop has n exits the above always forces
> you to pick the final one regardless of n, rather than just skip consideration of an exit.

Well, you can always do (before the loop)

  bool force_last_exit = dbg_cnt (vect_force_last_exit);

to evaluate it only once.  I think for debug counters we should avoid
introducing more lines of code when possible  (the above big odd blob).

> And in that case is there a point in analyzing all the exits just to throw away the information?
> 
> Doing in inside the consideration check would only skip one exit unless I'm misunderstanding.
> 
> > 
> > It's also seemingly redundant with vect_skip_exit, no?
> > 
> > Note the counter gets incremented even if we'd not consider the exit
> > because we have a later candidate already.
> > 
> > I fear it's going to be quite random even with the debug counter.
> 
> It is, I think the first counter is more useful. But in general the reason I kept the second counter
> which kinda does what was suggested in the RFC I sent before was that it should in theory at
> least allow us to test forcing of a PEELED case. Since we generally prefer the non-PEELED case
> if possible.
> 
> At least that was the intention.

For the PEELED case I wondered if a --param vect-allow-early-exit-peeled
defaulted to 1 would make more sense.  Of course it doesn't do this
per loop.

> Thanks,
> Tamar
> 
> > 
> > Can you see whether it really helps you?
> > 
> > >  	      && (!candidate
> > >  		  || dominated_by_p (CDI_DOMINATORS, exit->src,
> > >  				     candidate->src)))
> > >
> > >
> > >
> > >
> > >
> > 
> > --
> > Richard Biener <rguenther@suse.de>
> > SUSE Software Solutions Germany GmbH,
> > Frankenstrasse 146, 90461 Nuernberg, Germany;
> > GF: Ivo Totev, Andrew McDonald, Werner Knoblich; (HRB 36809, AG Nuernberg)
>
diff mbox series

Patch

--- a/gcc/dbgcnt.def
+++ b/gcc/dbgcnt.def
@@ -213,5 +213,7 @@  DEBUG_COUNTER (stv_conversion)
 DEBUG_COUNTER (tail_call)
 DEBUG_COUNTER (tree_sra)
 DEBUG_COUNTER (treepre_insert)
+DEBUG_COUNTER (vect_force_last_exit)
 DEBUG_COUNTER (vect_loop)
+DEBUG_COUNTER (vect_skip_exit)
 DEBUG_COUNTER (vect_slp)
diff --git a/gcc/tree-vect-loop.cc b/gcc/tree-vect-loop.cc
index eed2268e9bae7e7ad36d13da03e0b54eab26ef6f..854e9d78bc71721e6559a6bc5dff78c813603a78 100644
--- a/gcc/tree-vect-loop.cc
+++ b/gcc/tree-vect-loop.cc
@@ -58,6 +58,7 @@  along with GCC; see the file COPYING3.  If not see
 #include "tree-eh.h"
 #include "case-cfn-macros.h"
 #include "langhooks.h"
+#include "dbgcnt.h"
 
 /* Loop Vectorization Pass.
 
@@ -977,6 +978,20 @@  vec_init_loop_exit_info (class loop *loop)
   if (exits.length () == 1)
     return exits[0];
 
+  /* Check to see if we've been asked to force the last exit.  */
+  if (!dbg_cnt (vect_force_last_exit))
+    {
+      basic_block bb = ip_normal_pos (loop);
+      if (!bb)
+	return NULL;
+
+      edge exit = EDGE_SUCC (bb, 0);
+      if (exit->dest == loop->latch)
+	return EDGE_SUCC (bb, 1);
+
+      return exit;
+    }
+
   /* If we have multiple exits we only support counting IV at the moment.
      Analyze all exits and return the last one we can analyze.  */
   class tree_niter_desc niter_desc;
@@ -998,6 +1013,7 @@  vec_init_loop_exit_info (class loop *loop)
 		   && exit->src == single_pred (loop->latch)
 		   && (integer_nonzerop (may_be_zero)
 		       || COMPARISON_CLASS_P (may_be_zero))))
+	      && dbg_cnt (vect_skip_exit)
 	      && (!candidate
 		  || dominated_by_p (CDI_DOMINATORS, exit->src,
 				     candidate->src)))