diff mbox

Do framep replacement even on RHS outside of MEM contexts (PR debug/44694)

Message ID 20100629143144.GM25077@tyan-ft48-01.lab.bos.redhat.com
State New
Headers show

Commit Message

Jakub Jelinek June 29, 2010, 2:31 p.m. UTC
Hi!

On the ginac.ii testcase cc1plus spends huge amount of time in var-tracking.
The problem is that there are many variables with values sp + const_int
(e.g. this pointers for huge amount of methods) and code to handle
reversible ops results in very long loc_chain lists (up to 3740 entries),
where the sp value is equvalenced with (plus (some_other_value) (const_int N))
for many different values (and corresponding offsets).

Fixed by canonicalizing sp (resp. hardfp) to (framep) + offset
even outside of MEM addresses when on the RHS and not doing
reversible ops for framep - which doesn't buy us anything, framep is always
computable using DW_OP_fbreg anywhere in the function.

The speedup for ginac.ii -g -O2 is from over 3 minutes to 16 seconds.

Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?

2010-06-29  Jakub Jelinek  <jakub@redhat.com>

	PR debug/44694
	* cselib.h (cselib_preserve_cfa_base_value): Add regno argument.
	* cselib.c (cfa_base_preserved_regno): New static variable.
	(cselib_reset_table): Don't reset cfa_base_preserved_regno instead
	of REGNO (cfa_base_preserved_val->locs->loc).
	(cselib_preserve_cfa_base_value): Add regno argument, set
	cfa_base_preserved_regno to it.
	(cselib_invalidate_regno): Allow removal of registers other than
	cfa_base_preserved_regno from cfa_base_preserved_val.
	(cselib_finish): Set cfa_base_preserved_regno to INVALID_REGNUM.
	* var-tracking.c (adjust_mems): Replace sp or hfp even outside
	of MEM addresses, if not on LHS.
	(reverse_op): Don't add reverse ops for cfa_base_rtx.
	(vt_init_cfa_base): Adjust cselib_preserve_cfa_base_value caller.


	Jakub

Comments

Richard Biener June 29, 2010, 2:33 p.m. UTC | #1
On Tue, 29 Jun 2010, Jakub Jelinek wrote:

> Hi!
> 
> On the ginac.ii testcase cc1plus spends huge amount of time in var-tracking.
> The problem is that there are many variables with values sp + const_int
> (e.g. this pointers for huge amount of methods) and code to handle
> reversible ops results in very long loc_chain lists (up to 3740 entries),
> where the sp value is equvalenced with (plus (some_other_value) (const_int N))
> for many different values (and corresponding offsets).
> 
> Fixed by canonicalizing sp (resp. hardfp) to (framep) + offset
> even outside of MEM addresses when on the RHS and not doing
> reversible ops for framep - which doesn't buy us anything, framep is always
> computable using DW_OP_fbreg anywhere in the function.
> 
> The speedup for ginac.ii -g -O2 is from over 3 minutes to 16 seconds.
> 
> Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?

Ok.  Does this also apply to the branch?

Thanks,
Richard.

> 2010-06-29  Jakub Jelinek  <jakub@redhat.com>
> 
> 	PR debug/44694
> 	* cselib.h (cselib_preserve_cfa_base_value): Add regno argument.
> 	* cselib.c (cfa_base_preserved_regno): New static variable.
> 	(cselib_reset_table): Don't reset cfa_base_preserved_regno instead
> 	of REGNO (cfa_base_preserved_val->locs->loc).
> 	(cselib_preserve_cfa_base_value): Add regno argument, set
> 	cfa_base_preserved_regno to it.
> 	(cselib_invalidate_regno): Allow removal of registers other than
> 	cfa_base_preserved_regno from cfa_base_preserved_val.
> 	(cselib_finish): Set cfa_base_preserved_regno to INVALID_REGNUM.
> 	* var-tracking.c (adjust_mems): Replace sp or hfp even outside
> 	of MEM addresses, if not on LHS.
> 	(reverse_op): Don't add reverse ops for cfa_base_rtx.
> 	(vt_init_cfa_base): Adjust cselib_preserve_cfa_base_value caller.
> 
> --- gcc/cselib.h.jj	2010-03-31 13:12:00.000000000 +0200
> +++ gcc/cselib.h	2010-06-29 13:45:37.000000000 +0200
> @@ -99,6 +99,6 @@ extern unsigned int cselib_get_next_uid 
>  extern void cselib_preserve_value (cselib_val *);
>  extern bool cselib_preserved_value_p (cselib_val *);
>  extern void cselib_preserve_only_values (void);
> -extern void cselib_preserve_cfa_base_value (cselib_val *);
> +extern void cselib_preserve_cfa_base_value (cselib_val *, unsigned int);
>  
>  extern void dump_cselib_table (FILE *);
> --- gcc/cselib.c.jj	2010-05-25 11:27:47.000000000 +0200
> +++ gcc/cselib.c	2010-06-29 14:05:59.000000000 +0200
> @@ -178,6 +178,7 @@ static cselib_val dummy_val;
>     that is constant through the whole function and should never be
>     eliminated.  */
>  static cselib_val *cfa_base_preserved_val;
> +static unsigned int cfa_base_preserved_regno;
>  
>  /* Used to list all values that contain memory reference.
>     May or may not contain the useless values - the list is compacted
> @@ -338,7 +339,7 @@ cselib_reset_table (unsigned int num)
>  
>    if (cfa_base_preserved_val)
>      {
> -      unsigned int regno = REGNO (cfa_base_preserved_val->locs->loc);
> +      unsigned int regno = cfa_base_preserved_regno;
>        unsigned int new_used_regs = 0;
>        for (i = 0; i < n_used_regs; i++)
>  	if (used_regs[i] == regno)
> @@ -571,12 +572,15 @@ cselib_preserved_value_p (cselib_val *v)
>     never invalidated and preserved across cselib_reset_table calls.  */
>  
>  void
> -cselib_preserve_cfa_base_value (cselib_val *v)
> +cselib_preserve_cfa_base_value (cselib_val *v, unsigned int regno)
>  {
>    if (cselib_preserve_constants
>        && v->locs
>        && REG_P (v->locs->loc))
> -    cfa_base_preserved_val = v;
> +    {
> +      cfa_base_preserved_val = v;
> +      cfa_base_preserved_regno = regno;
> +    }
>  }
>  
>  /* Clean all non-constant expressions in the hash table, but retain
> @@ -1783,7 +1787,9 @@ cselib_invalidate_regno (unsigned int re
>  	  if (i < FIRST_PSEUDO_REGISTER && v != NULL)
>  	    this_last = end_hard_regno (GET_MODE (v->val_rtx), i) - 1;
>  
> -	  if (this_last < regno || v == NULL || v == cfa_base_preserved_val)
> +	  if (this_last < regno || v == NULL
> +	      || (v == cfa_base_preserved_val
> +		  && i == cfa_base_preserved_regno))
>  	    {
>  	      l = &(*l)->next;
>  	      continue;
> @@ -2266,6 +2272,7 @@ cselib_finish (void)
>    cselib_discard_hook = NULL;
>    cselib_preserve_constants = false;
>    cfa_base_preserved_val = NULL;
> +  cfa_base_preserved_regno = INVALID_REGNUM;
>    free_alloc_pool (elt_list_pool);
>    free_alloc_pool (elt_loc_list_pool);
>    free_alloc_pool (cselib_val_pool);
> --- gcc/var-tracking.c.jj	2010-06-29 10:33:44.000000000 +0200
> +++ gcc/var-tracking.c	2010-06-29 13:46:07.000000000 +0200
> @@ -798,8 +798,9 @@ adjust_mems (rtx loc, const_rtx old_rtx,
>    switch (GET_CODE (loc))
>      {
>      case REG:
> -      /* Don't do any sp or fp replacements outside of MEM addresses.  */
> -      if (amd->mem_mode == VOIDmode)
> +      /* Don't do any sp or fp replacements outside of MEM addresses
> +         on the LHS.  */
> +      if (amd->mem_mode == VOIDmode && amd->store)
>  	return loc;
>        if (loc == stack_pointer_rtx
>  	  && !frame_pointer_needed)
> @@ -5193,7 +5194,9 @@ reverse_op (rtx val, const_rtx expr)
>        return NULL_RTX;
>      }
>  
> -  if (!REG_P (XEXP (src, 0)) || !SCALAR_INT_MODE_P (GET_MODE (src)))
> +  if (!REG_P (XEXP (src, 0))
> +      || !SCALAR_INT_MODE_P (GET_MODE (src))
> +      || XEXP (src, 0) == cfa_base_rtx)
>      return NULL_RTX;
>  
>    v = cselib_lookup (XEXP (src, 0), GET_MODE (XEXP (src, 0)), 0);
> @@ -8163,7 +8166,7 @@ vt_init_cfa_base (void)
>    val = cselib_lookup_from_insn (cfa_base_rtx, GET_MODE (cfa_base_rtx), 1,
>  				 get_insns ());
>    preserve_value (val);
> -  cselib_preserve_cfa_base_value (val);
> +  cselib_preserve_cfa_base_value (val, REGNO (cfa_base_rtx));
>    var_reg_decl_set (&VTI (ENTRY_BLOCK_PTR)->out, cfa_base_rtx,
>  		    VAR_INIT_STATUS_INITIALIZED, dv_from_value (val->val_rtx),
>  		    0, NULL_RTX, INSERT);
> 
> 	Jakub
> 
>
Jakub Jelinek June 29, 2010, 2:35 p.m. UTC | #2
On Tue, Jun 29, 2010 at 04:33:26PM +0200, Richard Guenther wrote:
> On Tue, 29 Jun 2010, Jakub Jelinek wrote:
> > On the ginac.ii testcase cc1plus spends huge amount of time in var-tracking.
> > The problem is that there are many variables with values sp + const_int
> > (e.g. this pointers for huge amount of methods) and code to handle
> > reversible ops results in very long loc_chain lists (up to 3740 entries),
> > where the sp value is equvalenced with (plus (some_other_value) (const_int N))
> > for many different values (and corresponding offsets).
> > 
> > Fixed by canonicalizing sp (resp. hardfp) to (framep) + offset
> > even outside of MEM addresses when on the RHS and not doing
> > reversible ops for framep - which doesn't buy us anything, framep is always
> > computable using DW_OP_fbreg anywhere in the function.
> > 
> > The speedup for ginac.ii -g -O2 is from over 3 minutes to 16 seconds.
> > 
> > Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?
> 
> Ok.  Does this also apply to the branch?

I guess so, I'd just wait for a few days before backporting it.

	Jakub
Jie Zhang July 1, 2010, 8:13 a.m. UTC | #3
On 06/29/2010 10:31 PM, Jakub Jelinek wrote:
> Hi!
>
> On the ginac.ii testcase cc1plus spends huge amount of time in var-tracking.
> The problem is that there are many variables with values sp + const_int
> (e.g. this pointers for huge amount of methods) and code to handle
> reversible ops results in very long loc_chain lists (up to 3740 entries),
> where the sp value is equvalenced with (plus (some_other_value) (const_int N))
> for many different values (and corresponding offsets).
>
> Fixed by canonicalizing sp (resp. hardfp) to (framep) + offset
> even outside of MEM addresses when on the RHS and not doing
> reversible ops for framep - which doesn't buy us anything, framep is always
> computable using DW_OP_fbreg anywhere in the function.
>
> The speedup for ginac.ii -g -O2 is from over 3 minutes to 16 seconds.
>
> Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?
>
> 2010-06-29  Jakub Jelinek<jakub@redhat.com>
>
> 	PR debug/44694
> 	* cselib.h (cselib_preserve_cfa_base_value): Add regno argument.
> 	* cselib.c (cfa_base_preserved_regno): New static variable.
> 	(cselib_reset_table): Don't reset cfa_base_preserved_regno instead
> 	of REGNO (cfa_base_preserved_val->locs->loc).
> 	(cselib_preserve_cfa_base_value): Add regno argument, set
> 	cfa_base_preserved_regno to it.
> 	(cselib_invalidate_regno): Allow removal of registers other than
> 	cfa_base_preserved_regno from cfa_base_preserved_val.
> 	(cselib_finish): Set cfa_base_preserved_regno to INVALID_REGNUM.
> 	* var-tracking.c (adjust_mems): Replace sp or hfp even outside
> 	of MEM addresses, if not on LHS.
> 	(reverse_op): Don't add reverse ops for cfa_base_rtx.
> 	(vt_init_cfa_base): Adjust cselib_preserve_cfa_base_value caller.
>

This patch caused ICE for arm-none-eabi target:

internal compiler error: in arm_dbx_register_number, at 
config/arm/arm.c:21155

when compiling thumb multilib libstdc++.

> --- gcc/var-tracking.c.jj	2010-06-29 10:33:44.000000000 +0200
> +++ gcc/var-tracking.c	2010-06-29 13:46:07.000000000 +0200
> @@ -798,8 +798,9 @@ adjust_mems (rtx loc, const_rtx old_rtx,
>     switch (GET_CODE (loc))
>       {
>       case REG:
> -      /* Don't do any sp or fp replacements outside of MEM addresses.  */
> -      if (amd->mem_mode == VOIDmode)
> +      /* Don't do any sp or fp replacements outside of MEM addresses
> +         on the LHS.  */
> +      if (amd->mem_mode == VOIDmode&&  amd->store)
>   	return loc;
>         if (loc == stack_pointer_rtx
>   	&&  !frame_pointer_needed)

If I revert this change, the ICE is gone.

The ICE occurs when arm_dbx_register_number receives ARG_POINTER_REGNUM 
but it doesn't handle it since it isn't a hardware register.

x86 has no such problem because DBX_REGISTER_NUMBER returns -1 for non 
hardware register.


Regards,
diff mbox

Patch

--- gcc/cselib.h.jj	2010-03-31 13:12:00.000000000 +0200
+++ gcc/cselib.h	2010-06-29 13:45:37.000000000 +0200
@@ -99,6 +99,6 @@  extern unsigned int cselib_get_next_uid 
 extern void cselib_preserve_value (cselib_val *);
 extern bool cselib_preserved_value_p (cselib_val *);
 extern void cselib_preserve_only_values (void);
-extern void cselib_preserve_cfa_base_value (cselib_val *);
+extern void cselib_preserve_cfa_base_value (cselib_val *, unsigned int);
 
 extern void dump_cselib_table (FILE *);
--- gcc/cselib.c.jj	2010-05-25 11:27:47.000000000 +0200
+++ gcc/cselib.c	2010-06-29 14:05:59.000000000 +0200
@@ -178,6 +178,7 @@  static cselib_val dummy_val;
    that is constant through the whole function and should never be
    eliminated.  */
 static cselib_val *cfa_base_preserved_val;
+static unsigned int cfa_base_preserved_regno;
 
 /* Used to list all values that contain memory reference.
    May or may not contain the useless values - the list is compacted
@@ -338,7 +339,7 @@  cselib_reset_table (unsigned int num)
 
   if (cfa_base_preserved_val)
     {
-      unsigned int regno = REGNO (cfa_base_preserved_val->locs->loc);
+      unsigned int regno = cfa_base_preserved_regno;
       unsigned int new_used_regs = 0;
       for (i = 0; i < n_used_regs; i++)
 	if (used_regs[i] == regno)
@@ -571,12 +572,15 @@  cselib_preserved_value_p (cselib_val *v)
    never invalidated and preserved across cselib_reset_table calls.  */
 
 void
-cselib_preserve_cfa_base_value (cselib_val *v)
+cselib_preserve_cfa_base_value (cselib_val *v, unsigned int regno)
 {
   if (cselib_preserve_constants
       && v->locs
       && REG_P (v->locs->loc))
-    cfa_base_preserved_val = v;
+    {
+      cfa_base_preserved_val = v;
+      cfa_base_preserved_regno = regno;
+    }
 }
 
 /* Clean all non-constant expressions in the hash table, but retain
@@ -1783,7 +1787,9 @@  cselib_invalidate_regno (unsigned int re
 	  if (i < FIRST_PSEUDO_REGISTER && v != NULL)
 	    this_last = end_hard_regno (GET_MODE (v->val_rtx), i) - 1;
 
-	  if (this_last < regno || v == NULL || v == cfa_base_preserved_val)
+	  if (this_last < regno || v == NULL
+	      || (v == cfa_base_preserved_val
+		  && i == cfa_base_preserved_regno))
 	    {
 	      l = &(*l)->next;
 	      continue;
@@ -2266,6 +2272,7 @@  cselib_finish (void)
   cselib_discard_hook = NULL;
   cselib_preserve_constants = false;
   cfa_base_preserved_val = NULL;
+  cfa_base_preserved_regno = INVALID_REGNUM;
   free_alloc_pool (elt_list_pool);
   free_alloc_pool (elt_loc_list_pool);
   free_alloc_pool (cselib_val_pool);
--- gcc/var-tracking.c.jj	2010-06-29 10:33:44.000000000 +0200
+++ gcc/var-tracking.c	2010-06-29 13:46:07.000000000 +0200
@@ -798,8 +798,9 @@  adjust_mems (rtx loc, const_rtx old_rtx,
   switch (GET_CODE (loc))
     {
     case REG:
-      /* Don't do any sp or fp replacements outside of MEM addresses.  */
-      if (amd->mem_mode == VOIDmode)
+      /* Don't do any sp or fp replacements outside of MEM addresses
+         on the LHS.  */
+      if (amd->mem_mode == VOIDmode && amd->store)
 	return loc;
       if (loc == stack_pointer_rtx
 	  && !frame_pointer_needed)
@@ -5193,7 +5194,9 @@  reverse_op (rtx val, const_rtx expr)
       return NULL_RTX;
     }
 
-  if (!REG_P (XEXP (src, 0)) || !SCALAR_INT_MODE_P (GET_MODE (src)))
+  if (!REG_P (XEXP (src, 0))
+      || !SCALAR_INT_MODE_P (GET_MODE (src))
+      || XEXP (src, 0) == cfa_base_rtx)
     return NULL_RTX;
 
   v = cselib_lookup (XEXP (src, 0), GET_MODE (XEXP (src, 0)), 0);
@@ -8163,7 +8166,7 @@  vt_init_cfa_base (void)
   val = cselib_lookup_from_insn (cfa_base_rtx, GET_MODE (cfa_base_rtx), 1,
 				 get_insns ());
   preserve_value (val);
-  cselib_preserve_cfa_base_value (val);
+  cselib_preserve_cfa_base_value (val, REGNO (cfa_base_rtx));
   var_reg_decl_set (&VTI (ENTRY_BLOCK_PTR)->out, cfa_base_rtx,
 		    VAR_INIT_STATUS_INITIALIZED, dv_from_value (val->val_rtx),
 		    0, NULL_RTX, INSERT);