diff mbox

ipa-cp heuristics fixes

Message ID 20151217181246.GV18720@tucnak.redhat.com
State New
Headers show

Commit Message

Jakub Jelinek Dec. 17, 2015, 6:12 p.m. UTC
On Wed, Dec 16, 2015 at 08:15:12PM +0100, Jan Hubicka wrote:
> just to summarize a discussion on IRC. The problem is that we produce debug
> statements for eliminated arguments only in ipa-sra and ipa-split, while we
> don't do anything for cgraph clones. This is a problem on release branches,
> too.
> 
> It seems we have all the necessary logic, but the callee modification code from
> ipa-split should be moved to tree_function_versioning (which is used by both
> ipa-split and cgraph clone mechanizm) and caller modifcation copied to
> cgraph_edge::redirect_call_stmt_to_callee.
> 
> I am trying to do that. It seems bit difficult as the caller and callee
> modifications are tied together and I do not know how chaining of
> transfomraitons is going to work. 

Ok, so here is a WIP patch changing the functions you wanted, untested so
far.

I've been looking at 3 testcases (attached), -1.c and -3.c with -g -O2,
and -2.c with -g -O3.
The -3.c one is a copy of the test we have for the ipa-split debug info
stuff, before/after the patch we generate the same stuff.
-2.c testcase is for the (new or now much more often taken patch) of
ipa-cp, the patch arranges for proper debug info in that case
(but, I'm really surprised why when the function is already cloned, nothing
figures out that the clone is always called with the same constant
passed to the arg8 and the argument isn't removed and replaced by constant.
-1.c is a testcase for the IPA-SRA path, where we unfortunately end up with
-a slight regression (on the IL size, in the end we generate the same
assembly):
+  # DEBUG D#8 s=> arg8
+  # DEBUG arg8 => D#8
   # DEBUG arg8 => 7
with the patch.  On that testcase, arg8 is used, but it is always passed
value 7 (similarly to -2.c testcase) and in that case we really don't
need/want the decl_debug_args stuff, it is unnecessary, it is enough to say
in the callee that arg8 is 7.  Nothing on the caller side sets the magic
corresponding D# debug expr decl anyway.
Either tree_versioning is too low-level for the debug info addition, or
we need to figure out how to tell it if a constant will be always passed
to some argument and what that constant will be, so that we'd emit
always the # DEBUG arg8 => constant in that case instead of the source bind
stuff (but then figure out what has added that and avoid duplication too).

And then there is another thing, but best to be handled somewhere in
dwarf2out.c or in the debugger.  The arguments are printed in pretty random
order:
#0  foo (arg7=arg7@entry=30, arg8=arg8@entry=7, arg6=6, arg5=5, arg4=4, arg3=3, arg2=2, arg1=1) at pr68860-2.c:15
So, either the debugger for functions with abstract origins should look at
the order of arguments in the abstract origin and ignore order in the
particular instantiation, or dwarf2out.c should sort the
DW_TAG_formal_parameter such that it if at all possible matches the order
specified in the source.



	Jakub
/* PR debug/36728 */
/* { dg-do run } */
/* { dg-options "-g" } */

#define NOP "nop"

static int __attribute__((noinline))
foo (int arg1, int arg2, int arg3, int arg4, int arg5, int arg6, int arg7, int arg8)
{
  char *x = __builtin_alloca (arg7);
  int __attribute__ ((aligned(32))) y;

  y = 2;
  asm (NOP : "=m" (y) : "m" (y));
  x[0] = 25 + arg8;
  asm volatile (NOP : "=m" (x[0]) : "m" (x[0]));
  return y;
}

/* On s390(x) r2 and r3 are (depending on the optimization level) used
   when adjusting the addresses in order to meet the alignment
   requirements above.  They usually hold the function arguments arg1
   and arg2.  So it is expected that these values are unavailable in
   some of these tests.  */

/* { dg-final { gdb-test 14 "arg1" "1" { target { ! "s390*-*-*" } } } }*/
/* { dg-final { gdb-test 14 "arg2" "2" { target { ! "s390*-*-*" } } } }*/
/* { dg-final { gdb-test 14 "arg3" "3" } } */
/* { dg-final { gdb-test 14 "arg4" "4" } } */
/* { dg-final { gdb-test 14 "arg5" "5" } } */
/* { dg-final { gdb-test 14 "arg6" "6" } } */
/* { dg-final { gdb-test 14 "arg7" "30" } } */
/* { dg-final { gdb-test 14 "y" "2" } } */
/* { dg-final { gdb-test 16 "arg1" "1" { target { ! "s390*-*-*" } } } }*/
/* { dg-final { gdb-test 16 "arg2" "2" { target { ! "s390*-*-*" } } } }*/
/* { dg-final { gdb-test 16 "arg3" "3" } } */
/* { dg-final { gdb-test 16 "arg4" "4" } } */
/* { dg-final { gdb-test 16 "arg5" "5" } } */
/* { dg-final { gdb-test 16 "arg6" "6" } } */
/* { dg-final { gdb-test 16 "arg7" "30" } } */
/* { dg-final { gdb-test 16 "*x" "(char) 25" } } */
/* { dg-final { gdb-test 16 "y" "2" } } */

int q;

int
main ()
{
  int l = 0;
  asm ("" : "=r" (l) : "0" (l));
  foo (l + 1, l + 2, l + 3, l + 4, l + 5, l + 6, l + 30, 7);
  asm volatile ("" :: "r" (l));
  return 0;
}
/* PR debug/36728 */
/* { dg-do run } */
/* { dg-options "-g" } */

#define NOP "nop"

int __attribute__((noinline))
foo (int arg1, int arg2, int arg3, int arg4, int arg5, int arg6, int arg7, int arg8)
{
  char *x = __builtin_alloca (arg7);
  int __attribute__ ((aligned(32))) y;

  y = 2;
  asm (NOP : "=m" (y) : "m" (y));
  x[0] = 25 + arg8;
  asm volatile (NOP : "=m" (x[0]) : "m" (x[0]));
  return y;
}

/* On s390(x) r2 and r3 are (depending on the optimization level) used
   when adjusting the addresses in order to meet the alignment
   requirements above.  They usually hold the function arguments arg1
   and arg2.  So it is expected that these values are unavailable in
   some of these tests.  */

/* { dg-final { gdb-test 14 "arg1" "1" { target { ! "s390*-*-*" } } } }*/
/* { dg-final { gdb-test 14 "arg2" "2" { target { ! "s390*-*-*" } } } }*/
/* { dg-final { gdb-test 14 "arg3" "3" } } */
/* { dg-final { gdb-test 14 "arg4" "4" } } */
/* { dg-final { gdb-test 14 "arg5" "5" } } */
/* { dg-final { gdb-test 14 "arg6" "6" } } */
/* { dg-final { gdb-test 14 "arg7" "30" } } */
/* { dg-final { gdb-test 14 "y" "2" } } */
/* { dg-final { gdb-test 16 "arg1" "1" { target { ! "s390*-*-*" } } } }*/
/* { dg-final { gdb-test 16 "arg2" "2" { target { ! "s390*-*-*" } } } }*/
/* { dg-final { gdb-test 16 "arg3" "3" } } */
/* { dg-final { gdb-test 16 "arg4" "4" } } */
/* { dg-final { gdb-test 16 "arg5" "5" } } */
/* { dg-final { gdb-test 16 "arg6" "6" } } */
/* { dg-final { gdb-test 16 "arg7" "30" } } */
/* { dg-final { gdb-test 16 "*x" "(char) 25" } } */
/* { dg-final { gdb-test 16 "y" "2" } } */

int q;

int
main ()
{
  int l = 0;
  asm ("" : "=r" (l) : "0" (l));
  foo (l + 1, l + 2, l + 3, l + 4, l + 5, l + 6, l + 30, 7);
  asm volatile ("" :: "r" (l));
  return 0;
}
/* PR debug/54519 */
/* { dg-do run } */
/* { dg-options "-g" } */

__attribute__((noinline, noclone)) void
fn1 (int x)
{
  __asm volatile ("" : "+r" (x) : : "memory");
}

static int
fn2 (int x, int y, int z)
{
  int a = 8;
  if (x != z)
    {
      fn1 (x);
      fn1 (x);		/* { dg-final { gdb-test 20 "x" "36" } } */
      if (x == 36)	/* { dg-final { gdb-test 20 "y" "25" } } */
	fn1 (x);	/* { dg-final { gdb-test 20 "z" "6" } } */
      fn1 (x);		/* { dg-final { gdb-test 23 "x" "98" } } */
      if (x == 98)	/* { dg-final { gdb-test 23 "y" "117" } } */
	fn1 (x);	/* { dg-final { gdb-test 23 "z" "8" } } */
      fn1 (x);
      fn1 (x + a);
    }
  return y;
}

__attribute__((noinline, noclone)) int
fn3 (int x, int y)
{
  return fn2 (x, y, 6);
}

__attribute__((noinline, noclone)) int
fn4 (int x, int y)
{
  return fn2 (x, y, 8);
}

int
main ()
{
  fn3 (36, 25);
  fn4 (98, 117);
  return 0;
}

Comments

Jan Hubicka Dec. 17, 2015, 9 p.m. UTC | #1
Jakub,
thanks a lot for looking into this! I am now bit on tight schedule moving back
to Prague and I knew little about the implementation of debug info for
optimized out arguments.
> 
> Ok, so here is a WIP patch changing the functions you wanted, untested so
> far.
> 
> I've been looking at 3 testcases (attached), -1.c and -3.c with -g -O2,
> and -2.c with -g -O3.
> The -3.c one is a copy of the test we have for the ipa-split debug info
> stuff, before/after the patch we generate the same stuff.
> -2.c testcase is for the (new or now much more often taken patch) of

The path is not really new or much more taken :) We used to do 50k clones on
Firefox, now we do 11k clones.  It is just taken on different testcases
than before... So good news is that solving this should improve debug info
in quite few cases.

> ipa-cp, the patch arranges for proper debug info in that case
> (but, I'm really surprised why when the function is already cloned, nothing
> figures out that the clone is always called with the same constant
> passed to the arg8 and the argument isn't removed and replaced by constant.

I will take a look. Perhaps Martin will know.

> -1.c is a testcase for the IPA-SRA path, where we unfortunately end up with
> -a slight regression (on the IL size, in the end we generate the same
> assembly):
> +  # DEBUG D#8 s=> arg8
> +  # DEBUG arg8 => D#8
>    # DEBUG arg8 => 7
> with the patch.  On that testcase, arg8 is used, but it is always passed
> value 7 (similarly to -2.c testcase) and in that case we really don't
> need/want the decl_debug_args stuff, it is unnecessary, it is enough to say
> in the callee that arg8 is 7.  Nothing on the caller side sets the magic
> corresponding D# debug expr decl anyway.
> Either tree_versioning is too low-level for the debug info addition, or
> we need to figure out how to tell it if a constant will be always passed
> to some argument and what that constant will be, so that we'd emit

tree_versioning has the info for that.  It obtains args_to_skip for arguments
that should be skipped and also tree_map which tells for those argument
that are skipped (removed) what they should be replaced for.  For those we
substituted by a constant, we ge the constant there.  

Note that there is also code to replace aggrgates by constants that bypasses
tree_versioning but I donot think we remove the aggregate arguments after 
propagating the constant in (we should).

I wonder if you tried to trigger a cascaded clonning.  For example:
struct a {int a;int b;};

static int reta (struct a a, int unused)
{
  return a.a;
}
main()
{
  struct a a={1,1};
  int v = reta(a,1);
  struct a a2={1,1};
  v += reta(a2,2);
  return v;
}

will first produce isra clone and then constprop clone and finally turn it to inline clone.

The changed to cgraph and tree-inline makes sense to me.

Honza
> +      /* For optimized away parameters, add on the caller side
> +	 before the call
> +	 DEBUG D#X => parm_Y(D)
> +	 stmts and associate D#X with parm in decl_debug_args_lookup
> +	 vector to say for debug info that if parameter parm had been passed,
> +	 it would have value parm_Y(D).  */
> +      if (e->callee->clone.combined_args_to_skip && MAY_HAVE_DEBUG_STMTS)
> +	{
> +	  vec<tree, va_gc> **debug_args
> +	    = decl_debug_args_lookup (e->callee->decl);
> +	  if (debug_args)
> +	    {
> +	      tree parm;
> +	      unsigned i = 0, num;
> +	      unsigned len = vec_safe_length (*debug_args);
> +	      for (parm = DECL_ARGUMENTS (decl), num = 0;
> +		   parm; parm = DECL_CHAIN (parm), num++)
> +		if (bitmap_bit_p (e->callee->clone.combined_args_to_skip, num)
> +		    && is_gimple_reg (parm))
> +		  {
> +		    gimple *def_temp;
> +		    unsigned last = i;
> +
> +		    while (i < len && (**debug_args)[i] != DECL_ORIGIN (parm))
> +		      i += 2;
> +		    if (i >= len)
> +		      {
> +			i = 0;
> +			while (i < last && (**debug_args)[i]
> +			       != DECL_ORIGIN (parm))
> +			  i += 2;
> +			if (i >= last)
> +			  continue;
> +		      }
> +		    tree ddecl = (**debug_args)[i + 1];
> +		    tree arg = gimple_call_arg (e->call_stmt, num);
> +		    def_temp
> +		      = gimple_build_debug_bind (ddecl, unshare_expr (arg),
> +						 e->call_stmt);
> +		    gsi_insert_before (&gsi, def_temp, GSI_SAME_STMT);
> +		  }
> +	    }
> +	}
> +
>        gsi_replace (&gsi, new_stmt, false);
>        /* We need to defer cleaning EH info on the new statement to
>           fixup-cfg.  We may not have dominator information at this point
> --- gcc/tree-inline.c.jj	2015-12-10 16:56:26.000000000 +0100
> +++ gcc/tree-inline.c	2015-12-17 18:56:18.667166746 +0100
> @@ -5740,9 +5740,8 @@ tree_function_versioning (tree old_decl,
>    /* Copy the function's static chain.  */
>    p = DECL_STRUCT_FUNCTION (old_decl)->static_chain_decl;
>    if (p)
> -    DECL_STRUCT_FUNCTION (new_decl)->static_chain_decl =
> -      copy_static_chain (DECL_STRUCT_FUNCTION (old_decl)->static_chain_decl,
> -			 &id);
> +    DECL_STRUCT_FUNCTION (new_decl)->static_chain_decl
> +      = copy_static_chain (p, &id);
>  
>    /* If there's a tree_map, prepare for substitution.  */
>    if (tree_map)
> @@ -5797,9 +5796,9 @@ tree_function_versioning (tree old_decl,
>        }
>    /* Copy the function's arguments.  */
>    if (DECL_ARGUMENTS (old_decl) != NULL_TREE)
> -    DECL_ARGUMENTS (new_decl) =
> -      copy_arguments_for_versioning (DECL_ARGUMENTS (old_decl), &id,
> -      				     args_to_skip, &vars);
> +    DECL_ARGUMENTS (new_decl)
> +      = copy_arguments_for_versioning (DECL_ARGUMENTS (old_decl), &id,
> +				       args_to_skip, &vars);
>  
>    DECL_INITIAL (new_decl) = remap_blocks (DECL_INITIAL (id.src_fn), &id);
>    BLOCK_SUPERCONTEXT (DECL_INITIAL (new_decl)) = new_decl;
> @@ -5914,6 +5913,67 @@ tree_function_versioning (tree old_decl,
>  	}
>      }
>  
> +  if (args_to_skip && MAY_HAVE_DEBUG_STMTS)
> +    {
> +      tree parm;
> +      vec<tree, va_gc> **debug_args = NULL;
> +      unsigned int len = 0;
> +      for (parm = DECL_ARGUMENTS (old_decl), i = 0;
> +	   parm; parm = DECL_CHAIN (parm), i++)
> +	if (bitmap_bit_p (args_to_skip, i) && is_gimple_reg (parm))
> +	  {
> +	    tree ddecl;
> +
> +	    if (debug_args == NULL)
> +	      {
> +		debug_args = decl_debug_args_insert (new_decl);
> +		len = vec_safe_length (*debug_args);
> +	      }
> +	    ddecl = make_node (DEBUG_EXPR_DECL);
> +	    DECL_ARTIFICIAL (ddecl) = 1;
> +	    TREE_TYPE (ddecl) = TREE_TYPE (parm);
> +	    DECL_MODE (ddecl) = DECL_MODE (parm);
> +	    vec_safe_push (*debug_args, DECL_ORIGIN (parm));
> +	    vec_safe_push (*debug_args, ddecl);
> +	  }
> +      if (debug_args != NULL)
> +	{
> +	  /* On the callee side, add
> +	     DEBUG D#Y s=> parm
> +	     DEBUG var => D#Y
> +	     stmts to the first bb where var is a VAR_DECL created for the
> +	     optimized away parameter in DECL_INITIAL block.  This hints
> +	     in the debug info that var (whole DECL_ORIGIN is the parm
> +	     PARM_DECL) is optimized away, but could be looked up at the
> +	     call site as value of D#X there.  */
> +	  tree var = vars, vexpr;
> +	  gimple_stmt_iterator cgsi
> +	    = gsi_after_labels (single_succ (ENTRY_BLOCK_PTR_FOR_FN (cfun)));
> +	  gimple *def_temp;
> +	  var = vars;
> +	  i = vec_safe_length (*debug_args);
> +	  do
> +	    {
> +	      i -= 2;
> +	      while (var != NULL_TREE
> +		     && DECL_ABSTRACT_ORIGIN (var) != (**debug_args)[i])
> +		var = TREE_CHAIN (var);
> +	      if (var == NULL_TREE)
> +		break;
> +	      vexpr = make_node (DEBUG_EXPR_DECL);
> +	      parm = (**debug_args)[i];
> +	      DECL_ARTIFICIAL (vexpr) = 1;
> +	      TREE_TYPE (vexpr) = TREE_TYPE (parm);
> +	      DECL_MODE (vexpr) = DECL_MODE (parm);
> +	      def_temp = gimple_build_debug_bind (var, vexpr, NULL);
> +	      gsi_insert_before (&cgsi, def_temp, GSI_NEW_STMT);
> +	      def_temp = gimple_build_debug_source_bind (vexpr, parm, NULL);
> +	      gsi_insert_before (&cgsi, def_temp, GSI_NEW_STMT);
> +	    }
> +	  while (i > len);
> +	}
> +    }
> +
>    free_dominance_info (CDI_DOMINATORS);
>    free_dominance_info (CDI_POST_DOMINATORS);
>  
> 
> 
> 	Jakub

> /* PR debug/36728 */
> /* { dg-do run } */
> /* { dg-options "-g" } */
> 
> #define NOP "nop"
> 
> static int __attribute__((noinline))
> foo (int arg1, int arg2, int arg3, int arg4, int arg5, int arg6, int arg7, int arg8)
> {
>   char *x = __builtin_alloca (arg7);
>   int __attribute__ ((aligned(32))) y;
> 
>   y = 2;
>   asm (NOP : "=m" (y) : "m" (y));
>   x[0] = 25 + arg8;
>   asm volatile (NOP : "=m" (x[0]) : "m" (x[0]));
>   return y;
> }
> 
> /* On s390(x) r2 and r3 are (depending on the optimization level) used
>    when adjusting the addresses in order to meet the alignment
>    requirements above.  They usually hold the function arguments arg1
>    and arg2.  So it is expected that these values are unavailable in
>    some of these tests.  */
> 
> /* { dg-final { gdb-test 14 "arg1" "1" { target { ! "s390*-*-*" } } } }*/
> /* { dg-final { gdb-test 14 "arg2" "2" { target { ! "s390*-*-*" } } } }*/
> /* { dg-final { gdb-test 14 "arg3" "3" } } */
> /* { dg-final { gdb-test 14 "arg4" "4" } } */
> /* { dg-final { gdb-test 14 "arg5" "5" } } */
> /* { dg-final { gdb-test 14 "arg6" "6" } } */
> /* { dg-final { gdb-test 14 "arg7" "30" } } */
> /* { dg-final { gdb-test 14 "y" "2" } } */
> /* { dg-final { gdb-test 16 "arg1" "1" { target { ! "s390*-*-*" } } } }*/
> /* { dg-final { gdb-test 16 "arg2" "2" { target { ! "s390*-*-*" } } } }*/
> /* { dg-final { gdb-test 16 "arg3" "3" } } */
> /* { dg-final { gdb-test 16 "arg4" "4" } } */
> /* { dg-final { gdb-test 16 "arg5" "5" } } */
> /* { dg-final { gdb-test 16 "arg6" "6" } } */
> /* { dg-final { gdb-test 16 "arg7" "30" } } */
> /* { dg-final { gdb-test 16 "*x" "(char) 25" } } */
> /* { dg-final { gdb-test 16 "y" "2" } } */
> 
> int q;
> 
> int
> main ()
> {
>   int l = 0;
>   asm ("" : "=r" (l) : "0" (l));
>   foo (l + 1, l + 2, l + 3, l + 4, l + 5, l + 6, l + 30, 7);
>   asm volatile ("" :: "r" (l));
>   return 0;
> }

> /* PR debug/36728 */
> /* { dg-do run } */
> /* { dg-options "-g" } */
> 
> #define NOP "nop"
> 
> int __attribute__((noinline))
> foo (int arg1, int arg2, int arg3, int arg4, int arg5, int arg6, int arg7, int arg8)
> {
>   char *x = __builtin_alloca (arg7);
>   int __attribute__ ((aligned(32))) y;
> 
>   y = 2;
>   asm (NOP : "=m" (y) : "m" (y));
>   x[0] = 25 + arg8;
>   asm volatile (NOP : "=m" (x[0]) : "m" (x[0]));
>   return y;
> }
> 
> /* On s390(x) r2 and r3 are (depending on the optimization level) used
>    when adjusting the addresses in order to meet the alignment
>    requirements above.  They usually hold the function arguments arg1
>    and arg2.  So it is expected that these values are unavailable in
>    some of these tests.  */
> 
> /* { dg-final { gdb-test 14 "arg1" "1" { target { ! "s390*-*-*" } } } }*/
> /* { dg-final { gdb-test 14 "arg2" "2" { target { ! "s390*-*-*" } } } }*/
> /* { dg-final { gdb-test 14 "arg3" "3" } } */
> /* { dg-final { gdb-test 14 "arg4" "4" } } */
> /* { dg-final { gdb-test 14 "arg5" "5" } } */
> /* { dg-final { gdb-test 14 "arg6" "6" } } */
> /* { dg-final { gdb-test 14 "arg7" "30" } } */
> /* { dg-final { gdb-test 14 "y" "2" } } */
> /* { dg-final { gdb-test 16 "arg1" "1" { target { ! "s390*-*-*" } } } }*/
> /* { dg-final { gdb-test 16 "arg2" "2" { target { ! "s390*-*-*" } } } }*/
> /* { dg-final { gdb-test 16 "arg3" "3" } } */
> /* { dg-final { gdb-test 16 "arg4" "4" } } */
> /* { dg-final { gdb-test 16 "arg5" "5" } } */
> /* { dg-final { gdb-test 16 "arg6" "6" } } */
> /* { dg-final { gdb-test 16 "arg7" "30" } } */
> /* { dg-final { gdb-test 16 "*x" "(char) 25" } } */
> /* { dg-final { gdb-test 16 "y" "2" } } */
> 
> int q;
> 
> int
> main ()
> {
>   int l = 0;
>   asm ("" : "=r" (l) : "0" (l));
>   foo (l + 1, l + 2, l + 3, l + 4, l + 5, l + 6, l + 30, 7);
>   asm volatile ("" :: "r" (l));
>   return 0;
> }

> /* PR debug/54519 */
> /* { dg-do run } */
> /* { dg-options "-g" } */
> 
> __attribute__((noinline, noclone)) void
> fn1 (int x)
> {
>   __asm volatile ("" : "+r" (x) : : "memory");
> }
> 
> static int
> fn2 (int x, int y, int z)
> {
>   int a = 8;
>   if (x != z)
>     {
>       fn1 (x);
>       fn1 (x);		/* { dg-final { gdb-test 20 "x" "36" } } */
>       if (x == 36)	/* { dg-final { gdb-test 20 "y" "25" } } */
> 	fn1 (x);	/* { dg-final { gdb-test 20 "z" "6" } } */
>       fn1 (x);		/* { dg-final { gdb-test 23 "x" "98" } } */
>       if (x == 98)	/* { dg-final { gdb-test 23 "y" "117" } } */
> 	fn1 (x);	/* { dg-final { gdb-test 23 "z" "8" } } */
>       fn1 (x);
>       fn1 (x + a);
>     }
>   return y;
> }
> 
> __attribute__((noinline, noclone)) int
> fn3 (int x, int y)
> {
>   return fn2 (x, y, 6);
> }
> 
> __attribute__((noinline, noclone)) int
> fn4 (int x, int y)
> {
>   return fn2 (x, y, 8);
> }
> 
> int
> main ()
> {
>   fn3 (36, 25);
>   fn4 (98, 117);
>   return 0;
> }
Jan Hubicka Dec. 17, 2015, 9:10 p.m. UTC | #2
> Jakub,
> thanks a lot for looking into this! I am now bit on tight schedule moving back
> to Prague and I knew little about the implementation of debug info for
> optimized out arguments.
Hi,
here is better testcase that also trigger splitting
struct a {int a;int b;};

inline
static int reta (struct a a, int unused, int c)
{
  if (__builtin_expect (c,1) != 0)
   {
     return c;
   }
  test();
  test();
  test();
  test();
  test();
  test();
  test();
  test();
  test();
  return a.a;
}
main()
{
  struct a a={1,1};
  int v = reta(a,1,1);
  struct a a2={1,1};
  v += reta(a2,2,1);
  return v;
}

Compile with -fno-early-inlining

Honza
diff mbox

Patch

--- gcc/ipa-split.c.jj	2015-12-10 11:14:00.000000000 +0100
+++ gcc/ipa-split.c	2015-12-17 18:21:39.402036180 +0100
@@ -1209,7 +1209,6 @@  split_function (basic_block return_bb, s
   gimple *last_stmt = NULL;
   unsigned int i;
   tree arg, ddef;
-  vec<tree, va_gc> **debug_args = NULL;
 
   if (dump_file)
     {
@@ -1432,73 +1431,38 @@  split_function (basic_block return_bb, s
      vector to say for debug info that if parameter parm had been passed,
      it would have value parm_Y(D).  */
   if (args_to_skip)
-    for (parm = DECL_ARGUMENTS (current_function_decl), num = 0;
-	 parm; parm = DECL_CHAIN (parm), num++)
-      if (bitmap_bit_p (args_to_skip, num)
-	  && is_gimple_reg (parm))
-	{
-	  tree ddecl;
-	  gimple *def_temp;
-
-	  /* This needs to be done even without MAY_HAVE_DEBUG_STMTS,
-	     otherwise if it didn't exist before, we'd end up with
-	     different SSA_NAME_VERSIONs between -g and -g0.  */
-	  arg = get_or_create_ssa_default_def (cfun, parm);
-	  if (!MAY_HAVE_DEBUG_STMTS)
-	    continue;
-
-	  if (debug_args == NULL)
-	    debug_args = decl_debug_args_insert (node->decl);
-	  ddecl = make_node (DEBUG_EXPR_DECL);
-	  DECL_ARTIFICIAL (ddecl) = 1;
-	  TREE_TYPE (ddecl) = TREE_TYPE (parm);
-	  DECL_MODE (ddecl) = DECL_MODE (parm);
-	  vec_safe_push (*debug_args, DECL_ORIGIN (parm));
-	  vec_safe_push (*debug_args, ddecl);
-	  def_temp = gimple_build_debug_bind (ddecl, unshare_expr (arg),
-					      call);
-	  gsi_insert_after (&gsi, def_temp, GSI_NEW_STMT);
-	}
-  /* And on the callee side, add
-     DEBUG D#Y s=> parm
-     DEBUG var => D#Y
-     stmts to the first bb where var is a VAR_DECL created for the
-     optimized away parameter in DECL_INITIAL block.  This hints
-     in the debug info that var (whole DECL_ORIGIN is the parm PARM_DECL)
-     is optimized away, but could be looked up at the call site
-     as value of D#X there.  */
-  if (debug_args != NULL)
     {
-      unsigned int i;
-      tree var, vexpr;
-      gimple_stmt_iterator cgsi;
-      gimple *def_temp;
-
-      push_cfun (DECL_STRUCT_FUNCTION (node->decl));
-      var = BLOCK_VARS (DECL_INITIAL (node->decl));
-      i = vec_safe_length (*debug_args);
-      cgsi = gsi_after_labels (single_succ (ENTRY_BLOCK_PTR_FOR_FN (cfun)));
-      do
+      vec<tree, va_gc> **debug_args = NULL;
+      unsigned i = 0, len = 0;
+      if (MAY_HAVE_DEBUG_STMTS)
 	{
-	  i -= 2;
-	  while (var != NULL_TREE
-		 && DECL_ABSTRACT_ORIGIN (var) != (**debug_args)[i])
-	    var = TREE_CHAIN (var);
-	  if (var == NULL_TREE)
-	    break;
-	  vexpr = make_node (DEBUG_EXPR_DECL);
-	  parm = (**debug_args)[i];
-	  DECL_ARTIFICIAL (vexpr) = 1;
-	  TREE_TYPE (vexpr) = TREE_TYPE (parm);
-	  DECL_MODE (vexpr) = DECL_MODE (parm);
-	  def_temp = gimple_build_debug_source_bind (vexpr, parm,
-						     NULL);
-	  gsi_insert_before (&cgsi, def_temp, GSI_SAME_STMT);
-	  def_temp = gimple_build_debug_bind (var, vexpr, NULL);
-	  gsi_insert_before (&cgsi, def_temp, GSI_SAME_STMT);
+	  debug_args = decl_debug_args_lookup (node->decl);
+	  if (debug_args)
+	    len = vec_safe_length (*debug_args);
 	}
-      while (i);
-      pop_cfun ();
+      for (parm = DECL_ARGUMENTS (current_function_decl), num = 0;
+	   parm; parm = DECL_CHAIN (parm), num++)
+	if (bitmap_bit_p (args_to_skip, num) && is_gimple_reg (parm))
+	  {
+	    tree ddecl;
+	    gimple *def_temp;
+
+	    /* This needs to be done even without MAY_HAVE_DEBUG_STMTS,
+	       otherwise if it didn't exist before, we'd end up with
+	       different SSA_NAME_VERSIONs between -g and -g0.  */
+	    arg = get_or_create_ssa_default_def (cfun, parm);
+	    if (!MAY_HAVE_DEBUG_STMTS || debug_args == NULL)
+	      continue;
+
+	    while (i < len && (**debug_args)[i] != DECL_ORIGIN (parm))
+	      i += 2;
+	    if (i >= len)
+	      continue;
+	    ddecl = (**debug_args)[i + 1];
+	    def_temp
+	      = gimple_build_debug_bind (ddecl, unshare_expr (arg), call);
+	    gsi_insert_after (&gsi, def_temp, GSI_NEW_STMT);
+	  }
     }
 
   /* We avoid address being taken on any variable used by split part,
--- gcc/cgraph.c.jj	2015-12-16 09:02:11.000000000 +0100
+++ gcc/cgraph.c	2015-12-17 18:34:50.609062057 +0100
@@ -58,6 +58,7 @@  along with GCC; see the file COPYING3.
 #include "params.h"
 #include "tree-chkp.h"
 #include "context.h"
+#include "gimplify.h"
 
 /* FIXME: Only for PROP_loops, but cgraph shouldn't have to know about this.  */
 #include "tree-pass.h"
@@ -1420,6 +1421,51 @@  cgraph_edge::redirect_call_stmt_to_calle
 	SSA_NAME_DEF_STMT (gimple_vdef (new_stmt)) = new_stmt;
 
       gsi = gsi_for_stmt (e->call_stmt);
+
+      /* For optimized away parameters, add on the caller side
+	 before the call
+	 DEBUG D#X => parm_Y(D)
+	 stmts and associate D#X with parm in decl_debug_args_lookup
+	 vector to say for debug info that if parameter parm had been passed,
+	 it would have value parm_Y(D).  */
+      if (e->callee->clone.combined_args_to_skip && MAY_HAVE_DEBUG_STMTS)
+	{
+	  vec<tree, va_gc> **debug_args
+	    = decl_debug_args_lookup (e->callee->decl);
+	  if (debug_args)
+	    {
+	      tree parm;
+	      unsigned i = 0, num;
+	      unsigned len = vec_safe_length (*debug_args);
+	      for (parm = DECL_ARGUMENTS (decl), num = 0;
+		   parm; parm = DECL_CHAIN (parm), num++)
+		if (bitmap_bit_p (e->callee->clone.combined_args_to_skip, num)
+		    && is_gimple_reg (parm))
+		  {
+		    gimple *def_temp;
+		    unsigned last = i;
+
+		    while (i < len && (**debug_args)[i] != DECL_ORIGIN (parm))
+		      i += 2;
+		    if (i >= len)
+		      {
+			i = 0;
+			while (i < last && (**debug_args)[i]
+			       != DECL_ORIGIN (parm))
+			  i += 2;
+			if (i >= last)
+			  continue;
+		      }
+		    tree ddecl = (**debug_args)[i + 1];
+		    tree arg = gimple_call_arg (e->call_stmt, num);
+		    def_temp
+		      = gimple_build_debug_bind (ddecl, unshare_expr (arg),
+						 e->call_stmt);
+		    gsi_insert_before (&gsi, def_temp, GSI_SAME_STMT);
+		  }
+	    }
+	}
+
       gsi_replace (&gsi, new_stmt, false);
       /* We need to defer cleaning EH info on the new statement to
          fixup-cfg.  We may not have dominator information at this point
--- gcc/tree-inline.c.jj	2015-12-10 16:56:26.000000000 +0100
+++ gcc/tree-inline.c	2015-12-17 18:56:18.667166746 +0100
@@ -5740,9 +5740,8 @@  tree_function_versioning (tree old_decl,
   /* Copy the function's static chain.  */
   p = DECL_STRUCT_FUNCTION (old_decl)->static_chain_decl;
   if (p)
-    DECL_STRUCT_FUNCTION (new_decl)->static_chain_decl =
-      copy_static_chain (DECL_STRUCT_FUNCTION (old_decl)->static_chain_decl,
-			 &id);
+    DECL_STRUCT_FUNCTION (new_decl)->static_chain_decl
+      = copy_static_chain (p, &id);
 
   /* If there's a tree_map, prepare for substitution.  */
   if (tree_map)
@@ -5797,9 +5796,9 @@  tree_function_versioning (tree old_decl,
       }
   /* Copy the function's arguments.  */
   if (DECL_ARGUMENTS (old_decl) != NULL_TREE)
-    DECL_ARGUMENTS (new_decl) =
-      copy_arguments_for_versioning (DECL_ARGUMENTS (old_decl), &id,
-      				     args_to_skip, &vars);
+    DECL_ARGUMENTS (new_decl)
+      = copy_arguments_for_versioning (DECL_ARGUMENTS (old_decl), &id,
+				       args_to_skip, &vars);
 
   DECL_INITIAL (new_decl) = remap_blocks (DECL_INITIAL (id.src_fn), &id);
   BLOCK_SUPERCONTEXT (DECL_INITIAL (new_decl)) = new_decl;
@@ -5914,6 +5913,67 @@  tree_function_versioning (tree old_decl,
 	}
     }
 
+  if (args_to_skip && MAY_HAVE_DEBUG_STMTS)
+    {
+      tree parm;
+      vec<tree, va_gc> **debug_args = NULL;
+      unsigned int len = 0;
+      for (parm = DECL_ARGUMENTS (old_decl), i = 0;
+	   parm; parm = DECL_CHAIN (parm), i++)
+	if (bitmap_bit_p (args_to_skip, i) && is_gimple_reg (parm))
+	  {
+	    tree ddecl;
+
+	    if (debug_args == NULL)
+	      {
+		debug_args = decl_debug_args_insert (new_decl);
+		len = vec_safe_length (*debug_args);
+	      }
+	    ddecl = make_node (DEBUG_EXPR_DECL);
+	    DECL_ARTIFICIAL (ddecl) = 1;
+	    TREE_TYPE (ddecl) = TREE_TYPE (parm);
+	    DECL_MODE (ddecl) = DECL_MODE (parm);
+	    vec_safe_push (*debug_args, DECL_ORIGIN (parm));
+	    vec_safe_push (*debug_args, ddecl);
+	  }
+      if (debug_args != NULL)
+	{
+	  /* On the callee side, add
+	     DEBUG D#Y s=> parm
+	     DEBUG var => D#Y
+	     stmts to the first bb where var is a VAR_DECL created for the
+	     optimized away parameter in DECL_INITIAL block.  This hints
+	     in the debug info that var (whole DECL_ORIGIN is the parm
+	     PARM_DECL) is optimized away, but could be looked up at the
+	     call site as value of D#X there.  */
+	  tree var = vars, vexpr;
+	  gimple_stmt_iterator cgsi
+	    = gsi_after_labels (single_succ (ENTRY_BLOCK_PTR_FOR_FN (cfun)));
+	  gimple *def_temp;
+	  var = vars;
+	  i = vec_safe_length (*debug_args);
+	  do
+	    {
+	      i -= 2;
+	      while (var != NULL_TREE
+		     && DECL_ABSTRACT_ORIGIN (var) != (**debug_args)[i])
+		var = TREE_CHAIN (var);
+	      if (var == NULL_TREE)
+		break;
+	      vexpr = make_node (DEBUG_EXPR_DECL);
+	      parm = (**debug_args)[i];
+	      DECL_ARTIFICIAL (vexpr) = 1;
+	      TREE_TYPE (vexpr) = TREE_TYPE (parm);
+	      DECL_MODE (vexpr) = DECL_MODE (parm);
+	      def_temp = gimple_build_debug_bind (var, vexpr, NULL);
+	      gsi_insert_before (&cgsi, def_temp, GSI_NEW_STMT);
+	      def_temp = gimple_build_debug_source_bind (vexpr, parm, NULL);
+	      gsi_insert_before (&cgsi, def_temp, GSI_NEW_STMT);
+	    }
+	  while (i > len);
+	}
+    }
+
   free_dominance_info (CDI_DOMINATORS);
   free_dominance_info (CDI_POST_DOMINATORS);