Message ID | 20100629143144.GM25077@tyan-ft48-01.lab.bos.redhat.com |
---|---|
State | New |
Headers | show |
On Tue, 29 Jun 2010, Jakub Jelinek wrote: > Hi! > > On the ginac.ii testcase cc1plus spends huge amount of time in var-tracking. > The problem is that there are many variables with values sp + const_int > (e.g. this pointers for huge amount of methods) and code to handle > reversible ops results in very long loc_chain lists (up to 3740 entries), > where the sp value is equvalenced with (plus (some_other_value) (const_int N)) > for many different values (and corresponding offsets). > > Fixed by canonicalizing sp (resp. hardfp) to (framep) + offset > even outside of MEM addresses when on the RHS and not doing > reversible ops for framep - which doesn't buy us anything, framep is always > computable using DW_OP_fbreg anywhere in the function. > > The speedup for ginac.ii -g -O2 is from over 3 minutes to 16 seconds. > > Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk? Ok. Does this also apply to the branch? Thanks, Richard. > 2010-06-29 Jakub Jelinek <jakub@redhat.com> > > PR debug/44694 > * cselib.h (cselib_preserve_cfa_base_value): Add regno argument. > * cselib.c (cfa_base_preserved_regno): New static variable. > (cselib_reset_table): Don't reset cfa_base_preserved_regno instead > of REGNO (cfa_base_preserved_val->locs->loc). > (cselib_preserve_cfa_base_value): Add regno argument, set > cfa_base_preserved_regno to it. > (cselib_invalidate_regno): Allow removal of registers other than > cfa_base_preserved_regno from cfa_base_preserved_val. > (cselib_finish): Set cfa_base_preserved_regno to INVALID_REGNUM. > * var-tracking.c (adjust_mems): Replace sp or hfp even outside > of MEM addresses, if not on LHS. > (reverse_op): Don't add reverse ops for cfa_base_rtx. > (vt_init_cfa_base): Adjust cselib_preserve_cfa_base_value caller. > > --- gcc/cselib.h.jj 2010-03-31 13:12:00.000000000 +0200 > +++ gcc/cselib.h 2010-06-29 13:45:37.000000000 +0200 > @@ -99,6 +99,6 @@ extern unsigned int cselib_get_next_uid > extern void cselib_preserve_value (cselib_val *); > extern bool cselib_preserved_value_p (cselib_val *); > extern void cselib_preserve_only_values (void); > -extern void cselib_preserve_cfa_base_value (cselib_val *); > +extern void cselib_preserve_cfa_base_value (cselib_val *, unsigned int); > > extern void dump_cselib_table (FILE *); > --- gcc/cselib.c.jj 2010-05-25 11:27:47.000000000 +0200 > +++ gcc/cselib.c 2010-06-29 14:05:59.000000000 +0200 > @@ -178,6 +178,7 @@ static cselib_val dummy_val; > that is constant through the whole function and should never be > eliminated. */ > static cselib_val *cfa_base_preserved_val; > +static unsigned int cfa_base_preserved_regno; > > /* Used to list all values that contain memory reference. > May or may not contain the useless values - the list is compacted > @@ -338,7 +339,7 @@ cselib_reset_table (unsigned int num) > > if (cfa_base_preserved_val) > { > - unsigned int regno = REGNO (cfa_base_preserved_val->locs->loc); > + unsigned int regno = cfa_base_preserved_regno; > unsigned int new_used_regs = 0; > for (i = 0; i < n_used_regs; i++) > if (used_regs[i] == regno) > @@ -571,12 +572,15 @@ cselib_preserved_value_p (cselib_val *v) > never invalidated and preserved across cselib_reset_table calls. */ > > void > -cselib_preserve_cfa_base_value (cselib_val *v) > +cselib_preserve_cfa_base_value (cselib_val *v, unsigned int regno) > { > if (cselib_preserve_constants > && v->locs > && REG_P (v->locs->loc)) > - cfa_base_preserved_val = v; > + { > + cfa_base_preserved_val = v; > + cfa_base_preserved_regno = regno; > + } > } > > /* Clean all non-constant expressions in the hash table, but retain > @@ -1783,7 +1787,9 @@ cselib_invalidate_regno (unsigned int re > if (i < FIRST_PSEUDO_REGISTER && v != NULL) > this_last = end_hard_regno (GET_MODE (v->val_rtx), i) - 1; > > - if (this_last < regno || v == NULL || v == cfa_base_preserved_val) > + if (this_last < regno || v == NULL > + || (v == cfa_base_preserved_val > + && i == cfa_base_preserved_regno)) > { > l = &(*l)->next; > continue; > @@ -2266,6 +2272,7 @@ cselib_finish (void) > cselib_discard_hook = NULL; > cselib_preserve_constants = false; > cfa_base_preserved_val = NULL; > + cfa_base_preserved_regno = INVALID_REGNUM; > free_alloc_pool (elt_list_pool); > free_alloc_pool (elt_loc_list_pool); > free_alloc_pool (cselib_val_pool); > --- gcc/var-tracking.c.jj 2010-06-29 10:33:44.000000000 +0200 > +++ gcc/var-tracking.c 2010-06-29 13:46:07.000000000 +0200 > @@ -798,8 +798,9 @@ adjust_mems (rtx loc, const_rtx old_rtx, > switch (GET_CODE (loc)) > { > case REG: > - /* Don't do any sp or fp replacements outside of MEM addresses. */ > - if (amd->mem_mode == VOIDmode) > + /* Don't do any sp or fp replacements outside of MEM addresses > + on the LHS. */ > + if (amd->mem_mode == VOIDmode && amd->store) > return loc; > if (loc == stack_pointer_rtx > && !frame_pointer_needed) > @@ -5193,7 +5194,9 @@ reverse_op (rtx val, const_rtx expr) > return NULL_RTX; > } > > - if (!REG_P (XEXP (src, 0)) || !SCALAR_INT_MODE_P (GET_MODE (src))) > + if (!REG_P (XEXP (src, 0)) > + || !SCALAR_INT_MODE_P (GET_MODE (src)) > + || XEXP (src, 0) == cfa_base_rtx) > return NULL_RTX; > > v = cselib_lookup (XEXP (src, 0), GET_MODE (XEXP (src, 0)), 0); > @@ -8163,7 +8166,7 @@ vt_init_cfa_base (void) > val = cselib_lookup_from_insn (cfa_base_rtx, GET_MODE (cfa_base_rtx), 1, > get_insns ()); > preserve_value (val); > - cselib_preserve_cfa_base_value (val); > + cselib_preserve_cfa_base_value (val, REGNO (cfa_base_rtx)); > var_reg_decl_set (&VTI (ENTRY_BLOCK_PTR)->out, cfa_base_rtx, > VAR_INIT_STATUS_INITIALIZED, dv_from_value (val->val_rtx), > 0, NULL_RTX, INSERT); > > Jakub > >
On Tue, Jun 29, 2010 at 04:33:26PM +0200, Richard Guenther wrote: > On Tue, 29 Jun 2010, Jakub Jelinek wrote: > > On the ginac.ii testcase cc1plus spends huge amount of time in var-tracking. > > The problem is that there are many variables with values sp + const_int > > (e.g. this pointers for huge amount of methods) and code to handle > > reversible ops results in very long loc_chain lists (up to 3740 entries), > > where the sp value is equvalenced with (plus (some_other_value) (const_int N)) > > for many different values (and corresponding offsets). > > > > Fixed by canonicalizing sp (resp. hardfp) to (framep) + offset > > even outside of MEM addresses when on the RHS and not doing > > reversible ops for framep - which doesn't buy us anything, framep is always > > computable using DW_OP_fbreg anywhere in the function. > > > > The speedup for ginac.ii -g -O2 is from over 3 minutes to 16 seconds. > > > > Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk? > > Ok. Does this also apply to the branch? I guess so, I'd just wait for a few days before backporting it. Jakub
On 06/29/2010 10:31 PM, Jakub Jelinek wrote: > Hi! > > On the ginac.ii testcase cc1plus spends huge amount of time in var-tracking. > The problem is that there are many variables with values sp + const_int > (e.g. this pointers for huge amount of methods) and code to handle > reversible ops results in very long loc_chain lists (up to 3740 entries), > where the sp value is equvalenced with (plus (some_other_value) (const_int N)) > for many different values (and corresponding offsets). > > Fixed by canonicalizing sp (resp. hardfp) to (framep) + offset > even outside of MEM addresses when on the RHS and not doing > reversible ops for framep - which doesn't buy us anything, framep is always > computable using DW_OP_fbreg anywhere in the function. > > The speedup for ginac.ii -g -O2 is from over 3 minutes to 16 seconds. > > Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk? > > 2010-06-29 Jakub Jelinek<jakub@redhat.com> > > PR debug/44694 > * cselib.h (cselib_preserve_cfa_base_value): Add regno argument. > * cselib.c (cfa_base_preserved_regno): New static variable. > (cselib_reset_table): Don't reset cfa_base_preserved_regno instead > of REGNO (cfa_base_preserved_val->locs->loc). > (cselib_preserve_cfa_base_value): Add regno argument, set > cfa_base_preserved_regno to it. > (cselib_invalidate_regno): Allow removal of registers other than > cfa_base_preserved_regno from cfa_base_preserved_val. > (cselib_finish): Set cfa_base_preserved_regno to INVALID_REGNUM. > * var-tracking.c (adjust_mems): Replace sp or hfp even outside > of MEM addresses, if not on LHS. > (reverse_op): Don't add reverse ops for cfa_base_rtx. > (vt_init_cfa_base): Adjust cselib_preserve_cfa_base_value caller. > This patch caused ICE for arm-none-eabi target: internal compiler error: in arm_dbx_register_number, at config/arm/arm.c:21155 when compiling thumb multilib libstdc++. > --- gcc/var-tracking.c.jj 2010-06-29 10:33:44.000000000 +0200 > +++ gcc/var-tracking.c 2010-06-29 13:46:07.000000000 +0200 > @@ -798,8 +798,9 @@ adjust_mems (rtx loc, const_rtx old_rtx, > switch (GET_CODE (loc)) > { > case REG: > - /* Don't do any sp or fp replacements outside of MEM addresses. */ > - if (amd->mem_mode == VOIDmode) > + /* Don't do any sp or fp replacements outside of MEM addresses > + on the LHS. */ > + if (amd->mem_mode == VOIDmode&& amd->store) > return loc; > if (loc == stack_pointer_rtx > && !frame_pointer_needed) If I revert this change, the ICE is gone. The ICE occurs when arm_dbx_register_number receives ARG_POINTER_REGNUM but it doesn't handle it since it isn't a hardware register. x86 has no such problem because DBX_REGISTER_NUMBER returns -1 for non hardware register. Regards,
--- gcc/cselib.h.jj 2010-03-31 13:12:00.000000000 +0200 +++ gcc/cselib.h 2010-06-29 13:45:37.000000000 +0200 @@ -99,6 +99,6 @@ extern unsigned int cselib_get_next_uid extern void cselib_preserve_value (cselib_val *); extern bool cselib_preserved_value_p (cselib_val *); extern void cselib_preserve_only_values (void); -extern void cselib_preserve_cfa_base_value (cselib_val *); +extern void cselib_preserve_cfa_base_value (cselib_val *, unsigned int); extern void dump_cselib_table (FILE *); --- gcc/cselib.c.jj 2010-05-25 11:27:47.000000000 +0200 +++ gcc/cselib.c 2010-06-29 14:05:59.000000000 +0200 @@ -178,6 +178,7 @@ static cselib_val dummy_val; that is constant through the whole function and should never be eliminated. */ static cselib_val *cfa_base_preserved_val; +static unsigned int cfa_base_preserved_regno; /* Used to list all values that contain memory reference. May or may not contain the useless values - the list is compacted @@ -338,7 +339,7 @@ cselib_reset_table (unsigned int num) if (cfa_base_preserved_val) { - unsigned int regno = REGNO (cfa_base_preserved_val->locs->loc); + unsigned int regno = cfa_base_preserved_regno; unsigned int new_used_regs = 0; for (i = 0; i < n_used_regs; i++) if (used_regs[i] == regno) @@ -571,12 +572,15 @@ cselib_preserved_value_p (cselib_val *v) never invalidated and preserved across cselib_reset_table calls. */ void -cselib_preserve_cfa_base_value (cselib_val *v) +cselib_preserve_cfa_base_value (cselib_val *v, unsigned int regno) { if (cselib_preserve_constants && v->locs && REG_P (v->locs->loc)) - cfa_base_preserved_val = v; + { + cfa_base_preserved_val = v; + cfa_base_preserved_regno = regno; + } } /* Clean all non-constant expressions in the hash table, but retain @@ -1783,7 +1787,9 @@ cselib_invalidate_regno (unsigned int re if (i < FIRST_PSEUDO_REGISTER && v != NULL) this_last = end_hard_regno (GET_MODE (v->val_rtx), i) - 1; - if (this_last < regno || v == NULL || v == cfa_base_preserved_val) + if (this_last < regno || v == NULL + || (v == cfa_base_preserved_val + && i == cfa_base_preserved_regno)) { l = &(*l)->next; continue; @@ -2266,6 +2272,7 @@ cselib_finish (void) cselib_discard_hook = NULL; cselib_preserve_constants = false; cfa_base_preserved_val = NULL; + cfa_base_preserved_regno = INVALID_REGNUM; free_alloc_pool (elt_list_pool); free_alloc_pool (elt_loc_list_pool); free_alloc_pool (cselib_val_pool); --- gcc/var-tracking.c.jj 2010-06-29 10:33:44.000000000 +0200 +++ gcc/var-tracking.c 2010-06-29 13:46:07.000000000 +0200 @@ -798,8 +798,9 @@ adjust_mems (rtx loc, const_rtx old_rtx, switch (GET_CODE (loc)) { case REG: - /* Don't do any sp or fp replacements outside of MEM addresses. */ - if (amd->mem_mode == VOIDmode) + /* Don't do any sp or fp replacements outside of MEM addresses + on the LHS. */ + if (amd->mem_mode == VOIDmode && amd->store) return loc; if (loc == stack_pointer_rtx && !frame_pointer_needed) @@ -5193,7 +5194,9 @@ reverse_op (rtx val, const_rtx expr) return NULL_RTX; } - if (!REG_P (XEXP (src, 0)) || !SCALAR_INT_MODE_P (GET_MODE (src))) + if (!REG_P (XEXP (src, 0)) + || !SCALAR_INT_MODE_P (GET_MODE (src)) + || XEXP (src, 0) == cfa_base_rtx) return NULL_RTX; v = cselib_lookup (XEXP (src, 0), GET_MODE (XEXP (src, 0)), 0); @@ -8163,7 +8166,7 @@ vt_init_cfa_base (void) val = cselib_lookup_from_insn (cfa_base_rtx, GET_MODE (cfa_base_rtx), 1, get_insns ()); preserve_value (val); - cselib_preserve_cfa_base_value (val); + cselib_preserve_cfa_base_value (val, REGNO (cfa_base_rtx)); var_reg_decl_set (&VTI (ENTRY_BLOCK_PTR)->out, cfa_base_rtx, VAR_INIT_STATUS_INITIALIZED, dv_from_value (val->val_rtx), 0, NULL_RTX, INSERT);