diff mbox

loop-ch tweek

Message ID 20160605193557.GA87278@kam.mff.cuni.cz
State New
Headers show

Commit Message

Jan Hubicka June 5, 2016, 7:35 p.m. UTC
Hi,
both loop-ch and loop-ivcanon want to trottle down the heuristics on paths
containing call. Testing for presence of GIMPLE_CALL is wrong for internal
call and cheap builtins that are expanded inline.

Bootstrapped/regtested x86_64-linux, OK?

Honza

	* gimple.c: Include builtins.h.
	(gimple_real_call_p): New function.
	* gimple.h (gimple_real_call_p): Declare.
	* tree-ssa-loop-ch.c (should_duplicate_loop_header_p): Use it.
	* tree-ssa-loop-ivcanon.c (tree_estimate_loop_size): Likewise.

Comments

Richard Biener June 6, 2016, 7:32 a.m. UTC | #1
On Sun, 5 Jun 2016, Jan Hubicka wrote:

> Hi,
> both loop-ch and loop-ivcanon want to trottle down the heuristics on paths
> containing call. Testing for presence of GIMPLE_CALL is wrong for internal
> call and cheap builtins that are expanded inline.
> 
> Bootstrapped/regtested x86_64-linux, OK?

First of all the name is bad - I'd say gimple_inexpensive_call_p ()
is better.  More comments below.

> Honza
> 
> 	* gimple.c: Include builtins.h.
> 	(gimple_real_call_p): New function.
> 	* gimple.h (gimple_real_call_p): Declare.
> 	* tree-ssa-loop-ch.c (should_duplicate_loop_header_p): Use it.
> 	* tree-ssa-loop-ivcanon.c (tree_estimate_loop_size): Likewise.
> Index: gimple.c
> ===================================================================
> --- gimple.c	(revision 237101)
> +++ gimple.c	(working copy)
> @@ -38,6 +38,7 @@ along with GCC; see the file COPYING3.
>  #include "gimple-walk.h"
>  #include "gimplify.h"
>  #include "target.h"
> +#include "builtins.h"
>  
>  
>  /* All the tuples have their operand vector (if present) at the very bottom
> @@ -3018,3 +3019,20 @@ maybe_remove_unused_call_args (struct fu
>        update_stmt_fn (fn, stmt);
>      }
>  }
> +
> +/* Return true if STMT will likely expand to real call statment.  */
> +
> +bool
> +gimple_real_call_p (gimple *stmt)
> +{
> +  if (gimple_code (stmt) != GIMPLE_CALL)
> +    return false;
> +  if (gimple_call_internal_p (stmt))
> +    return false;
> +  tree decl = gimple_call_fndecl (stmt);
> +  if (decl && DECL_IS_BUILTIN (decl)
> +      && (is_simple_builtin (decl)
> +	  || is_inexpensive_builtin (decl)))

is_inexpensive_builtin includes is_simple_builtin handling and also
tests decl && DECL_IS_BUILTIN properly already.

> +    return false;
> +  return true;
> +}
> Index: gimple.h
> ===================================================================
> --- gimple.h	(revision 237101)
> +++ gimple.h	(working copy)
> @@ -1525,6 +1525,7 @@ extern void preprocess_case_label_vec_fo
>  extern void gimple_seq_set_location (gimple_seq, location_t);
>  extern void gimple_seq_discard (gimple_seq);
>  extern void maybe_remove_unused_call_args (struct function *, gimple *);
> +extern bool gimple_real_call_p (gimple *);
>  
>  /* Formal (expression) temporary table handling: multiple occurrences of
>     the same scalar expression are evaluated into the same temporary.  */
> Index: tree-ssa-loop-ch.c
> ===================================================================
> --- tree-ssa-loop-ch.c	(revision 237101)
> +++ tree-ssa-loop-ch.c	(working copy)
> @@ -118,7 +118,7 @@ should_duplicate_loop_header_p (basic_bl
>        if (is_gimple_debug (last))
>  	continue;
>  
> -      if (is_gimple_call (last))
> +      if (gimple_real_call_p (last))
>  	{
>  	  if (dump_file && (dump_flags & TDF_DETAILS))
>  	    fprintf (dump_file,
> Index: tree-ssa-loop-ivcanon.c
> ===================================================================
> --- tree-ssa-loop-ivcanon.c	(revision 237101)
> +++ tree-ssa-loop-ivcanon.c	(working copy)
> @@ -339,15 +339,11 @@ tree_estimate_loop_size (struct loop *lo
>        for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
>  	{
>  	  gimple *stmt = gsi_stmt (gsi);
> -	  if (gimple_code (stmt) == GIMPLE_CALL)
> +	  if (gimple_code (stmt) == GIMPLE_CALL
> +	      && gimple_real_call_p (stmt))

gimple_code (stmt) == GIMPLE_CALL is redundant then.  I'd prefer to
make gimple_inexpensive_call_p take a gcall * argument and do the
test at the callers though.

>  	    {
>  	      int flags = gimple_call_flags (stmt);
> -	      tree decl = gimple_call_fndecl (stmt);
> -
> -	      if (decl && DECL_IS_BUILTIN (decl)
> -		  && is_inexpensive_builtin (decl))
> -		;
> -	      else if (flags & (ECF_PURE | ECF_CONST))
> +	      if (flags & (ECF_PURE | ECF_CONST))
>  		size->num_pure_calls_on_hot_path++;
>  	      else
>  		size->num_non_pure_calls_on_hot_path++;
> 
>
Jan Hubicka June 6, 2016, 10:04 a.m. UTC | #2
> On Sun, 5 Jun 2016, Jan Hubicka wrote:
> 
> > Hi,
> > both loop-ch and loop-ivcanon want to trottle down the heuristics on paths
> > containing call. Testing for presence of GIMPLE_CALL is wrong for internal
> > call and cheap builtins that are expanded inline.
> > 
> > Bootstrapped/regtested x86_64-linux, OK?
> 
> First of all the name is bad - I'd say gimple_inexpensive_call_p ()
> is better.  More comments below.

OK, the motivation for name is that I am really testing if the GIMPLE_CALL will end up
call instruction in the final assembly. No matter whetehr expensive or not.
> 
> gimple_code (stmt) == GIMPLE_CALL is redundant then.  I'd prefer to
> make gimple_inexpensive_call_p take a gcall * argument and do the
> test at the callers though.

OK, i will update patch.  I had mostly copied those tests from original
code which I think had them as short cirucuits. This most probalby does not
matter in practice and LTO may be eventually to do that for us. So it seemed
bit like premature optimization.  I will update the patch.

Honza
> 
> >  	    {
> >  	      int flags = gimple_call_flags (stmt);
> > -	      tree decl = gimple_call_fndecl (stmt);
> > -
> > -	      if (decl && DECL_IS_BUILTIN (decl)
> > -		  && is_inexpensive_builtin (decl))
> > -		;
> > -	      else if (flags & (ECF_PURE | ECF_CONST))
> > +	      if (flags & (ECF_PURE | ECF_CONST))
> >  		size->num_pure_calls_on_hot_path++;
> >  	      else
> >  		size->num_non_pure_calls_on_hot_path++;
> > 
> >
Richard Biener June 6, 2016, 10:21 a.m. UTC | #3
On Mon, 6 Jun 2016, Jan Hubicka wrote:

> > On Sun, 5 Jun 2016, Jan Hubicka wrote:
> > 
> > > Hi,
> > > both loop-ch and loop-ivcanon want to trottle down the heuristics on paths
> > > containing call. Testing for presence of GIMPLE_CALL is wrong for internal
> > > call and cheap builtins that are expanded inline.
> > > 
> > > Bootstrapped/regtested x86_64-linux, OK?
> > 
> > First of all the name is bad - I'd say gimple_inexpensive_call_p ()
> > is better.  More comments below.
> 
> OK, the motivation for name is that I am really testing if the GIMPLE_CALL will end up
> call instruction in the final assembly. No matter whetehr expensive or not.

Well, but that's not what your predicate tests ;)  For example
CLZ is considered is_inexpensive_builtin even though it may end up
as a call.  In fact even non-calls can end up as a libcall on
some targets.

> > gimple_code (stmt) == GIMPLE_CALL is redundant then.  I'd prefer to
> > make gimple_inexpensive_call_p take a gcall * argument and do the
> > test at the callers though.
> 
> OK, i will update patch.  I had mostly copied those tests from original
> code which I think had them as short cirucuits. This most probalby does not
> matter in practice and LTO may be eventually to do that for us. So it seemed
> bit like premature optimization.  I will update the patch.

Thanks,
Richard.

> Honza
> > 
> > >  	    {
> > >  	      int flags = gimple_call_flags (stmt);
> > > -	      tree decl = gimple_call_fndecl (stmt);
> > > -
> > > -	      if (decl && DECL_IS_BUILTIN (decl)
> > > -		  && is_inexpensive_builtin (decl))
> > > -		;
> > > -	      else if (flags & (ECF_PURE | ECF_CONST))
> > > +	      if (flags & (ECF_PURE | ECF_CONST))
> > >  		size->num_pure_calls_on_hot_path++;
> > >  	      else
> > >  		size->num_non_pure_calls_on_hot_path++;
> > > 
> > > 
> 
>
Jan Hubicka June 6, 2016, 12:33 p.m. UTC | #4
> On Mon, 6 Jun 2016, Jan Hubicka wrote:
> 
> > > On Sun, 5 Jun 2016, Jan Hubicka wrote:
> > > 
> > > > Hi,
> > > > both loop-ch and loop-ivcanon want to trottle down the heuristics on paths
> > > > containing call. Testing for presence of GIMPLE_CALL is wrong for internal
> > > > call and cheap builtins that are expanded inline.
> > > > 
> > > > Bootstrapped/regtested x86_64-linux, OK?
> > > 
> > > First of all the name is bad - I'd say gimple_inexpensive_call_p ()
> > > is better.  More comments below.
> > 
> > OK, the motivation for name is that I am really testing if the GIMPLE_CALL will end up
> > call instruction in the final assembly. No matter whetehr expensive or not.
> 
> Well, but that's not what your predicate tests ;)  For example
> CLZ is considered is_inexpensive_builtin even though it may end up
> as a call.  In fact even non-calls can end up as a libcall on
> some targets.

Yep, it is for heuristics estimating the runtime cost of the given path,
(number of branches and number of real calls). It doesn't need to be precise
but it would be better if it was.  

The general intutition that call within loop probalby makes the loop
uninteresting for expensive loop transforms seems kind of sound (i.e. I do not
know of counterexamples) even through it is not a real rocket science.

If I make gimple_inexpensive_call_p then the test would be
  if (gimple_code (stmt) == GIMPLE_CALL && !gimple_inexpensive_call_p (stmt))
    ... account that call is evil ...

Does that look OK?

BTW the hard-wired bound of 20 insns for header copying seems high. I will turn
it into --param and we probably could check what value is really needed.  I don't
think it was tested since it was moved away from jump.c which worked on quite
different context.

Honza
> 
> > > gimple_code (stmt) == GIMPLE_CALL is redundant then.  I'd prefer to
> > > make gimple_inexpensive_call_p take a gcall * argument and do the
> > > test at the callers though.
> > 
> > OK, i will update patch.  I had mostly copied those tests from original
> > code which I think had them as short cirucuits. This most probalby does not
> > matter in practice and LTO may be eventually to do that for us. So it seemed
> > bit like premature optimization.  I will update the patch.
> 
> Thanks,
> Richard.
> 
> > Honza
> > > 
> > > >  	    {
> > > >  	      int flags = gimple_call_flags (stmt);
> > > > -	      tree decl = gimple_call_fndecl (stmt);
> > > > -
> > > > -	      if (decl && DECL_IS_BUILTIN (decl)
> > > > -		  && is_inexpensive_builtin (decl))
> > > > -		;
> > > > -	      else if (flags & (ECF_PURE | ECF_CONST))
> > > > +	      if (flags & (ECF_PURE | ECF_CONST))
> > > >  		size->num_pure_calls_on_hot_path++;
> > > >  	      else
> > > >  		size->num_non_pure_calls_on_hot_path++;
> > > > 
> > > > 
> > 
> > 
> 
> -- 
> Richard Biener <rguenther@suse.de>
> SUSE LINUX GmbH, GF: Felix Imendoerffer, Jane Smithard, Graham Norton, HRB 21284 (AG Nuernberg)
diff mbox

Patch

Index: gimple.c
===================================================================
--- gimple.c	(revision 237101)
+++ gimple.c	(working copy)
@@ -38,6 +38,7 @@  along with GCC; see the file COPYING3.
 #include "gimple-walk.h"
 #include "gimplify.h"
 #include "target.h"
+#include "builtins.h"
 
 
 /* All the tuples have their operand vector (if present) at the very bottom
@@ -3018,3 +3019,20 @@  maybe_remove_unused_call_args (struct fu
       update_stmt_fn (fn, stmt);
     }
 }
+
+/* Return true if STMT will likely expand to real call statment.  */
+
+bool
+gimple_real_call_p (gimple *stmt)
+{
+  if (gimple_code (stmt) != GIMPLE_CALL)
+    return false;
+  if (gimple_call_internal_p (stmt))
+    return false;
+  tree decl = gimple_call_fndecl (stmt);
+  if (decl && DECL_IS_BUILTIN (decl)
+      && (is_simple_builtin (decl)
+	  || is_inexpensive_builtin (decl)))
+    return false;
+  return true;
+}
Index: gimple.h
===================================================================
--- gimple.h	(revision 237101)
+++ gimple.h	(working copy)
@@ -1525,6 +1525,7 @@  extern void preprocess_case_label_vec_fo
 extern void gimple_seq_set_location (gimple_seq, location_t);
 extern void gimple_seq_discard (gimple_seq);
 extern void maybe_remove_unused_call_args (struct function *, gimple *);
+extern bool gimple_real_call_p (gimple *);
 
 /* Formal (expression) temporary table handling: multiple occurrences of
    the same scalar expression are evaluated into the same temporary.  */
Index: tree-ssa-loop-ch.c
===================================================================
--- tree-ssa-loop-ch.c	(revision 237101)
+++ tree-ssa-loop-ch.c	(working copy)
@@ -118,7 +118,7 @@  should_duplicate_loop_header_p (basic_bl
       if (is_gimple_debug (last))
 	continue;
 
-      if (is_gimple_call (last))
+      if (gimple_real_call_p (last))
 	{
 	  if (dump_file && (dump_flags & TDF_DETAILS))
 	    fprintf (dump_file,
Index: tree-ssa-loop-ivcanon.c
===================================================================
--- tree-ssa-loop-ivcanon.c	(revision 237101)
+++ tree-ssa-loop-ivcanon.c	(working copy)
@@ -339,15 +339,11 @@  tree_estimate_loop_size (struct loop *lo
       for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
 	{
 	  gimple *stmt = gsi_stmt (gsi);
-	  if (gimple_code (stmt) == GIMPLE_CALL)
+	  if (gimple_code (stmt) == GIMPLE_CALL
+	      && gimple_real_call_p (stmt))
 	    {
 	      int flags = gimple_call_flags (stmt);
-	      tree decl = gimple_call_fndecl (stmt);
-
-	      if (decl && DECL_IS_BUILTIN (decl)
-		  && is_inexpensive_builtin (decl))
-		;
-	      else if (flags & (ECF_PURE | ECF_CONST))
+	      if (flags & (ECF_PURE | ECF_CONST))
 		size->num_pure_calls_on_hot_path++;
 	      else
 		size->num_non_pure_calls_on_hot_path++;