diff mbox

Var-tracking initialization fix (PR debug/63623)

Message ID 20141023073015.GZ10376@tucnak.redhat.com
State New
Headers show

Commit Message

Jakub Jelinek Oct. 23, 2014, 7:30 a.m. UTC
Hi!

As I wrote in the PR, vt_stack_adjustments can often compute wrong offsets,
because it never considers pops with autoinc addressing, which can lead
either to wrong debug info, or turning off -fvar-tracking altogether for
a function on which that issue resulted in stack depth inconsistencies on
the edges.

Here are some stats from --enable-checking=yes,rtl cc1plus bootstrapped
with/without the patch (without the patch got then rebuilt stage3 with the
patch, i.e. just var-tracking.o and cc1plus-checksum.o got recompiled, so
I'm comparing identical code, different debug info):

x86_64-linux cc1plus built without the patch:
cov%	samples	cumul
0.0	506230/38%	506230/38%
0..10	10327/0%	516557/39%
11..20	12390/0%	528947/39%
21..30	31265/2%	560212/42%
31..40	18775/1%	578987/43%
41..50	20631/1%	599618/45%
51..60	24921/1%	624539/47%
61..70	40959/3%	665498/50%
71..80	23771/1%	689269/52%
81..90	41771/3%	731040/55%
91..99	81667/6%	812707/61%
100	510564/38%	1323271/100%

x86_64-linux cc1plus built with the patch:
cov%	samples	cumul
0.0	382214/28%	382214/28%
0..10	13100/0%	395314/29%
11..20	14568/1%	409882/30%
21..30	33708/2%	443590/33%
31..40	21927/1%	465517/35%
41..50	23924/1%	489441/36%
51..60	28736/2%	518177/39%
61..70	45847/3%	564024/42%
71..80	29284/2%	593308/44%
81..90	52085/3%	645393/48%
91..99	99971/7%	745364/56%
100	577907/43%	1323271/100%

i686-linux cc1plus built without the patch:
cov%	samples	cumul
0.0	631348/48%	631348/48%
0..10	7764/0%	639112/48%
11..20	9690/0%	648802/49%
21..30	25036/1%	673838/51%
31..40	16113/1%	689951/52%
41..50	19753/1%	709704/54%
51..60	14563/1%	724267/55%
61..70	34093/2%	758360/58%
71..80	17450/1%	775810/59%
81..90	31339/2%	807149/61%
91..99	60368/4%	867517/66%
100	437548/33%	1305065/100%

i686-linux cc1plus built with the patch:
cov%	samples	cumul
0.0	377352/28%	377352/28%
0..10	16077/1%	393429/30%
11..20	15390/1%	408819/31%
21..30	31790/2%	440609/33%
31..40	23889/1%	464498/35%
41..50	29267/2%	493765/37%
51..60	22902/1%	516667/39%
61..70	45629/3%	562296/43%
71..80	29511/2%	591807/45%
81..90	50536/3%	642343/49%
91..99	93584/7%	735927/56%
100	569138/43%	1305065/100%

.debug_info/.debug_loc sizes in bytes:
x86_64-linux cc1plus without patch .debug_info 75411710, .debug_loc 75421077
x86_64-linux cc1plus with    patch .debug_info 78498790, .debug_loc 90530117
  i686-linux cc1plus without patch .debug_info 59921183, .debug_loc 37823166
  i686-linux cc1plus with    patch .debug_info 63009554, .debug_loc 59535100

I've also performed instrumented bootstraps/regtests (x86_64-linux and
i686-linux), where I've logged in how many functions the result of
vt_stack_adjustments differed between the bad old way and new way.
In both the bootstraps/regtests, it affected 16892 32-bit and 6646 64-bit
functions, in all cases it was old way giving up and new way succeeding.

Not adding a testcase, as the one in the PR failed to produce proper debug
info only in 4.8 (then got latent), there already are some guality improvements with the
patch:
-FAIL: gcc.dg/guality/pr54693-2.c   -O3 -fomit-frame-pointer -funroll-all-loops -finline-functions  line 21 i == v + 1
-FAIL: gcc.dg/guality/pr54693-2.c   -O3 -fomit-frame-pointer -funroll-loops  line 21 i == v + 1
on x86_64 and:
-FAIL: gcc.dg/guality/pr54519-1.c   -O2  line 20 y == 25
-FAIL: gcc.dg/guality/pr54519-1.c   -O2  line 20 z == 6
-FAIL: gcc.dg/guality/pr54519-1.c   -O2  line 23 y == 117
-FAIL: gcc.dg/guality/pr54519-1.c   -O2  line 23 z == 8
-FAIL: gcc.dg/guality/pr54519-1.c   -O3 -fomit-frame-pointer  line 20 x == 36
-FAIL: gcc.dg/guality/pr54519-1.c   -O3 -fomit-frame-pointer  line 20 y == 25
-FAIL: gcc.dg/guality/pr54519-1.c   -O3 -fomit-frame-pointer  line 20 z == 6
-FAIL: gcc.dg/guality/pr54519-1.c   -O3 -g  line 20 x == 36
-FAIL: gcc.dg/guality/pr54519-1.c   -O3 -g  line 20 y == 25
-FAIL: gcc.dg/guality/pr54519-1.c   -O3 -g  line 20 z == 6
-FAIL: gcc.dg/guality/pr54519-3.c   -O2  line 20 x == 36
-FAIL: gcc.dg/guality/pr54519-3.c   -O2  line 20 y == 25
-FAIL: gcc.dg/guality/pr54519-3.c   -O2  line 20 z == 6
-FAIL: gcc.dg/guality/pr54519-3.c   -O2  line 23 x == 98
-FAIL: gcc.dg/guality/pr54519-3.c   -O2  line 23 y == 117
-FAIL: gcc.dg/guality/pr54519-3.c   -O2  line 23 z == 8
-FAIL: gcc.dg/guality/pr54519-3.c   -O2 -flto  line 20 x == 36
-FAIL: gcc.dg/guality/pr54519-3.c   -O2 -flto  line 23 x == 98
-FAIL: gcc.dg/guality/pr54519-3.c   -O2 -flto -flto-partition=none  line 20 x == 36
-FAIL: gcc.dg/guality/pr54519-3.c   -O2 -flto -flto-partition=none  line 23 x == 98
-FAIL: gcc.dg/guality/pr54519-3.c   -O3 -fomit-frame-pointer  line 20 x == 36
-FAIL: gcc.dg/guality/pr54519-3.c   -O3 -fomit-frame-pointer  line 20 y == 25
-FAIL: gcc.dg/guality/pr54519-3.c   -O3 -fomit-frame-pointer  line 20 z == 6
-FAIL: gcc.dg/guality/pr54519-3.c   -O3 -fomit-frame-pointer  line 23 x == 98
-FAIL: gcc.dg/guality/pr54519-3.c   -O3 -fomit-frame-pointer  line 23 y == 117
-FAIL: gcc.dg/guality/pr54519-3.c   -O3 -fomit-frame-pointer  line 23 z == 8
-FAIL: gcc.dg/guality/pr54519-3.c   -O3 -g  line 20 x == 36
-FAIL: gcc.dg/guality/pr54519-3.c   -O3 -g  line 20 y == 25
-FAIL: gcc.dg/guality/pr54519-3.c   -O3 -g  line 20 z == 6
-FAIL: gcc.dg/guality/pr54519-3.c   -O3 -g  line 23 x == 98
-FAIL: gcc.dg/guality/pr54519-3.c   -O3 -g  line 23 y == 117
-FAIL: gcc.dg/guality/pr54519-3.c   -O3 -g  line 23 z == 8
-FAIL: gcc.dg/guality/pr54693-2.c   -O3 -fomit-frame-pointer -funroll-all-loops -finline-functions  line 21 i == v + 1
-FAIL: gcc.dg/guality/pr54693-2.c   -O3 -fomit-frame-pointer -funroll-loops  line 21 i == v + 1
on i686.

Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?

Not sure about branches, to some extent this must be a serious debug info
quality regression, since the problem got significantly worse with omitting
frame pointer by default on i686, and/or shrink wrapping and simple_return
changes where there are more pops in the middle of functions.  So with a short
patch we could significantly improve debug info.  The risk is compile time
regressions, if we gave up on var-tracking, we threw away all the hard tracked
debug stmts/debug insns, but don't need to costly propagate the values through
the dataflow.

2014-10-23  Jakub Jelinek  <jakub@redhat.com>

	PR debug/63623
	* var-tracking.c (stack_adjust_offset_pre_post_cb): New function.
	(stack_adjust_offset_pre_post): Use it through for_each_inc_dec,
	instead of only handling autoinc in dest if it is a MEM.
	(vt_stack_adjustments): Fix up formatting.


	Jakub

Comments

Richard Biener Oct. 23, 2014, 11:14 a.m. UTC | #1
On Thu, Oct 23, 2014 at 9:30 AM, Jakub Jelinek <jakub@redhat.com> wrote:
> Hi!
>
> As I wrote in the PR, vt_stack_adjustments can often compute wrong offsets,
> because it never considers pops with autoinc addressing, which can lead
> either to wrong debug info, or turning off -fvar-tracking altogether for
> a function on which that issue resulted in stack depth inconsistencies on
> the edges.
>
> Here are some stats from --enable-checking=yes,rtl cc1plus bootstrapped
> with/without the patch (without the patch got then rebuilt stage3 with the
> patch, i.e. just var-tracking.o and cc1plus-checksum.o got recompiled, so
> I'm comparing identical code, different debug info):
>
> x86_64-linux cc1plus built without the patch:
> cov%    samples cumul
> 0.0     506230/38%      506230/38%
> 0..10   10327/0%        516557/39%
> 11..20  12390/0%        528947/39%
> 21..30  31265/2%        560212/42%
> 31..40  18775/1%        578987/43%
> 41..50  20631/1%        599618/45%
> 51..60  24921/1%        624539/47%
> 61..70  40959/3%        665498/50%
> 71..80  23771/1%        689269/52%
> 81..90  41771/3%        731040/55%
> 91..99  81667/6%        812707/61%
> 100     510564/38%      1323271/100%
>
> x86_64-linux cc1plus built with the patch:
> cov%    samples cumul
> 0.0     382214/28%      382214/28%
> 0..10   13100/0%        395314/29%
> 11..20  14568/1%        409882/30%
> 21..30  33708/2%        443590/33%
> 31..40  21927/1%        465517/35%
> 41..50  23924/1%        489441/36%
> 51..60  28736/2%        518177/39%
> 61..70  45847/3%        564024/42%
> 71..80  29284/2%        593308/44%
> 81..90  52085/3%        645393/48%
> 91..99  99971/7%        745364/56%
> 100     577907/43%      1323271/100%
>
> i686-linux cc1plus built without the patch:
> cov%    samples cumul
> 0.0     631348/48%      631348/48%
> 0..10   7764/0% 639112/48%
> 11..20  9690/0% 648802/49%
> 21..30  25036/1%        673838/51%
> 31..40  16113/1%        689951/52%
> 41..50  19753/1%        709704/54%
> 51..60  14563/1%        724267/55%
> 61..70  34093/2%        758360/58%
> 71..80  17450/1%        775810/59%
> 81..90  31339/2%        807149/61%
> 91..99  60368/4%        867517/66%
> 100     437548/33%      1305065/100%
>
> i686-linux cc1plus built with the patch:
> cov%    samples cumul
> 0.0     377352/28%      377352/28%
> 0..10   16077/1%        393429/30%
> 11..20  15390/1%        408819/31%
> 21..30  31790/2%        440609/33%
> 31..40  23889/1%        464498/35%
> 41..50  29267/2%        493765/37%
> 51..60  22902/1%        516667/39%
> 61..70  45629/3%        562296/43%
> 71..80  29511/2%        591807/45%
> 81..90  50536/3%        642343/49%
> 91..99  93584/7%        735927/56%
> 100     569138/43%      1305065/100%
>
> .debug_info/.debug_loc sizes in bytes:
> x86_64-linux cc1plus without patch .debug_info 75411710, .debug_loc 75421077
> x86_64-linux cc1plus with    patch .debug_info 78498790, .debug_loc 90530117
>   i686-linux cc1plus without patch .debug_info 59921183, .debug_loc 37823166
>   i686-linux cc1plus with    patch .debug_info 63009554, .debug_loc 59535100
>
> I've also performed instrumented bootstraps/regtests (x86_64-linux and
> i686-linux), where I've logged in how many functions the result of
> vt_stack_adjustments differed between the bad old way and new way.
> In both the bootstraps/regtests, it affected 16892 32-bit and 6646 64-bit
> functions, in all cases it was old way giving up and new way succeeding.
>
> Not adding a testcase, as the one in the PR failed to produce proper debug
> info only in 4.8 (then got latent), there already are some guality improvements with the
> patch:
> -FAIL: gcc.dg/guality/pr54693-2.c   -O3 -fomit-frame-pointer -funroll-all-loops -finline-functions  line 21 i == v + 1
> -FAIL: gcc.dg/guality/pr54693-2.c   -O3 -fomit-frame-pointer -funroll-loops  line 21 i == v + 1
> on x86_64 and:
> -FAIL: gcc.dg/guality/pr54519-1.c   -O2  line 20 y == 25
> -FAIL: gcc.dg/guality/pr54519-1.c   -O2  line 20 z == 6
> -FAIL: gcc.dg/guality/pr54519-1.c   -O2  line 23 y == 117
> -FAIL: gcc.dg/guality/pr54519-1.c   -O2  line 23 z == 8
> -FAIL: gcc.dg/guality/pr54519-1.c   -O3 -fomit-frame-pointer  line 20 x == 36
> -FAIL: gcc.dg/guality/pr54519-1.c   -O3 -fomit-frame-pointer  line 20 y == 25
> -FAIL: gcc.dg/guality/pr54519-1.c   -O3 -fomit-frame-pointer  line 20 z == 6
> -FAIL: gcc.dg/guality/pr54519-1.c   -O3 -g  line 20 x == 36
> -FAIL: gcc.dg/guality/pr54519-1.c   -O3 -g  line 20 y == 25
> -FAIL: gcc.dg/guality/pr54519-1.c   -O3 -g  line 20 z == 6
> -FAIL: gcc.dg/guality/pr54519-3.c   -O2  line 20 x == 36
> -FAIL: gcc.dg/guality/pr54519-3.c   -O2  line 20 y == 25
> -FAIL: gcc.dg/guality/pr54519-3.c   -O2  line 20 z == 6
> -FAIL: gcc.dg/guality/pr54519-3.c   -O2  line 23 x == 98
> -FAIL: gcc.dg/guality/pr54519-3.c   -O2  line 23 y == 117
> -FAIL: gcc.dg/guality/pr54519-3.c   -O2  line 23 z == 8
> -FAIL: gcc.dg/guality/pr54519-3.c   -O2 -flto  line 20 x == 36
> -FAIL: gcc.dg/guality/pr54519-3.c   -O2 -flto  line 23 x == 98
> -FAIL: gcc.dg/guality/pr54519-3.c   -O2 -flto -flto-partition=none  line 20 x == 36
> -FAIL: gcc.dg/guality/pr54519-3.c   -O2 -flto -flto-partition=none  line 23 x == 98
> -FAIL: gcc.dg/guality/pr54519-3.c   -O3 -fomit-frame-pointer  line 20 x == 36
> -FAIL: gcc.dg/guality/pr54519-3.c   -O3 -fomit-frame-pointer  line 20 y == 25
> -FAIL: gcc.dg/guality/pr54519-3.c   -O3 -fomit-frame-pointer  line 20 z == 6
> -FAIL: gcc.dg/guality/pr54519-3.c   -O3 -fomit-frame-pointer  line 23 x == 98
> -FAIL: gcc.dg/guality/pr54519-3.c   -O3 -fomit-frame-pointer  line 23 y == 117
> -FAIL: gcc.dg/guality/pr54519-3.c   -O3 -fomit-frame-pointer  line 23 z == 8
> -FAIL: gcc.dg/guality/pr54519-3.c   -O3 -g  line 20 x == 36
> -FAIL: gcc.dg/guality/pr54519-3.c   -O3 -g  line 20 y == 25
> -FAIL: gcc.dg/guality/pr54519-3.c   -O3 -g  line 20 z == 6
> -FAIL: gcc.dg/guality/pr54519-3.c   -O3 -g  line 23 x == 98
> -FAIL: gcc.dg/guality/pr54519-3.c   -O3 -g  line 23 y == 117
> -FAIL: gcc.dg/guality/pr54519-3.c   -O3 -g  line 23 z == 8
> -FAIL: gcc.dg/guality/pr54693-2.c   -O3 -fomit-frame-pointer -funroll-all-loops -finline-functions  line 21 i == v + 1
> -FAIL: gcc.dg/guality/pr54693-2.c   -O3 -fomit-frame-pointer -funroll-loops  line 21 i == v + 1
> on i686.
>
> Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?
>
> Not sure about branches, to some extent this must be a serious debug info
> quality regression, since the problem got significantly worse with omitting
> frame pointer by default on i686, and/or shrink wrapping and simple_return
> changes where there are more pops in the middle of functions.  So with a short
> patch we could significantly improve debug info.  The risk is compile time
> regressions, if we gave up on var-tracking, we threw away all the hard tracked
> debug stmts/debug insns, but don't need to costly propagate the values through
> the dataflow.

I think if the patch does not hit 4.9.2 (which I guess it won't) then both
4.8 and 4.9 are too mature to get this kind of patch.

Richard.

> 2014-10-23  Jakub Jelinek  <jakub@redhat.com>
>
>         PR debug/63623
>         * var-tracking.c (stack_adjust_offset_pre_post_cb): New function.
>         (stack_adjust_offset_pre_post): Use it through for_each_inc_dec,
>         instead of only handling autoinc in dest if it is a MEM.
>         (vt_stack_adjustments): Fix up formatting.
>
> --- gcc/var-tracking.c.jj       2014-09-01 09:43:58.000000000 +0200
> +++ gcc/var-tracking.c  2014-10-22 22:08:42.985717561 +0200
> @@ -700,6 +700,39 @@ static void vt_add_function_parameters (
>  static bool vt_initialize (void);
>  static void vt_finalize (void);
>
> +/* Callback for stack_adjust_offset_pre_post, called via for_each_inc_dec.  */
> +
> +static int
> +stack_adjust_offset_pre_post_cb (rtx, rtx op, rtx dest, rtx src, rtx srcoff,
> +                                void *arg)
> +{
> +  if (dest != stack_pointer_rtx)
> +    return 0;
> +
> +  switch (GET_CODE (op))
> +    {
> +    case PRE_INC:
> +    case PRE_DEC:
> +      ((HOST_WIDE_INT *)arg)[0] -= INTVAL (srcoff);
> +      return 0;
> +    case POST_INC:
> +    case POST_DEC:
> +      ((HOST_WIDE_INT *)arg)[1] -= INTVAL (srcoff);
> +      return 0;
> +    case PRE_MODIFY:
> +    case POST_MODIFY:
> +      /* We handle only adjustments by constant amount.  */
> +      gcc_assert (GET_CODE (src) == PLUS
> +                 && CONST_INT_P (XEXP (src, 1))
> +                 && XEXP (src, 0) == stack_pointer_rtx);
> +      ((HOST_WIDE_INT *)arg)[GET_CODE (op) == POST_MODIFY]
> +       -= INTVAL (XEXP (src, 1));
> +      return 0;
> +    default:
> +      gcc_unreachable ();
> +    }
> +}
> +
>  /* Given a SET, calculate the amount of stack adjustment it contains
>     PRE- and POST-modifying stack pointer.
>     This function is similar to stack_adjust_offset.  */
> @@ -725,68 +758,12 @@ stack_adjust_offset_pre_post (rtx patter
>         *post += INTVAL (XEXP (src, 1));
>        else
>         *post -= INTVAL (XEXP (src, 1));
> +      return;
>      }
> -  else if (MEM_P (dest))
> -    {
> -      /* (set (mem (pre_dec (reg sp))) (foo)) */
> -      src = XEXP (dest, 0);
> -      code = GET_CODE (src);
> -
> -      switch (code)
> -       {
> -       case PRE_MODIFY:
> -       case POST_MODIFY:
> -         if (XEXP (src, 0) == stack_pointer_rtx)
> -           {
> -             rtx val = XEXP (XEXP (src, 1), 1);
> -             /* We handle only adjustments by constant amount.  */
> -             gcc_assert (GET_CODE (XEXP (src, 1)) == PLUS &&
> -                         CONST_INT_P (val));
> -
> -             if (code == PRE_MODIFY)
> -               *pre -= INTVAL (val);
> -             else
> -               *post -= INTVAL (val);
> -             break;
> -           }
> -         return;
> -
> -       case PRE_DEC:
> -         if (XEXP (src, 0) == stack_pointer_rtx)
> -           {
> -             *pre += GET_MODE_SIZE (GET_MODE (dest));
> -             break;
> -           }
> -         return;
> -
> -       case POST_DEC:
> -         if (XEXP (src, 0) == stack_pointer_rtx)
> -           {
> -             *post += GET_MODE_SIZE (GET_MODE (dest));
> -             break;
> -           }
> -         return;
> -
> -       case PRE_INC:
> -         if (XEXP (src, 0) == stack_pointer_rtx)
> -           {
> -             *pre -= GET_MODE_SIZE (GET_MODE (dest));
> -             break;
> -           }
> -         return;
> -
> -       case POST_INC:
> -         if (XEXP (src, 0) == stack_pointer_rtx)
> -           {
> -             *post -= GET_MODE_SIZE (GET_MODE (dest));
> -             break;
> -           }
> -         return;
> -
> -       default:
> -         return;
> -       }
> -    }
> +  HOST_WIDE_INT res[2] = { 0, 0 };
> +  for_each_inc_dec (pattern, stack_adjust_offset_pre_post_cb, res);
> +  *pre += res[0];
> +  *post += res[1];
>  }
>
>  /* Given an INSN, calculate the amount of stack adjustment it contains
> @@ -836,10 +813,10 @@ vt_stack_adjustments (void)
>
>    /* Initialize entry block.  */
>    VTI (ENTRY_BLOCK_PTR_FOR_FN (cfun))->visited = true;
> -  VTI (ENTRY_BLOCK_PTR_FOR_FN (cfun))->in.stack_adjust =
> - INCOMING_FRAME_SP_OFFSET;
> -  VTI (ENTRY_BLOCK_PTR_FOR_FN (cfun))->out.stack_adjust =
> - INCOMING_FRAME_SP_OFFSET;
> +  VTI (ENTRY_BLOCK_PTR_FOR_FN (cfun))->in.stack_adjust
> +    = INCOMING_FRAME_SP_OFFSET;
> +  VTI (ENTRY_BLOCK_PTR_FOR_FN (cfun))->out.stack_adjust
> +    = INCOMING_FRAME_SP_OFFSET;
>
>    /* Allocate stack for back-tracking up CFG.  */
>    stack = XNEWVEC (edge_iterator, n_basic_blocks_for_fn (cfun) + 1);
>
>         Jakub
Jeff Law Oct. 23, 2014, 9:35 p.m. UTC | #2
On 10/23/14 01:30, Jakub Jelinek wrote:
> Hi!
>
> As I wrote in the PR, vt_stack_adjustments can often compute wrong offsets,
> because it never considers pops with autoinc addressing, which can lead
> either to wrong debug info, or turning off -fvar-tracking altogether for
> a function on which that issue resulted in stack depth inconsistencies on
> the edges.
>
> Here are some stats from --enable-checking=yes,rtl cc1plus bootstrapped
> with/without the patch (without the patch got then rebuilt stage3 with the
> patch, i.e. just var-tracking.o and cc1plus-checksum.o got recompiled, so
> I'm comparing identical code, different debug info):
>
> x86_64-linux cc1plus built without the patch:
> cov%	samples	cumul
> 0.0	506230/38%	506230/38%
> 0..10	10327/0%	516557/39%
> 11..20	12390/0%	528947/39%
> 21..30	31265/2%	560212/42%
> 31..40	18775/1%	578987/43%
> 41..50	20631/1%	599618/45%
> 51..60	24921/1%	624539/47%
> 61..70	40959/3%	665498/50%
> 71..80	23771/1%	689269/52%
> 81..90	41771/3%	731040/55%
> 91..99	81667/6%	812707/61%
> 100	510564/38%	1323271/100%
>
> x86_64-linux cc1plus built with the patch:
> cov%	samples	cumul
> 0.0	382214/28%	382214/28%
> 0..10	13100/0%	395314/29%
> 11..20	14568/1%	409882/30%
> 21..30	33708/2%	443590/33%
> 31..40	21927/1%	465517/35%
> 41..50	23924/1%	489441/36%
> 51..60	28736/2%	518177/39%
> 61..70	45847/3%	564024/42%
> 71..80	29284/2%	593308/44%
> 81..90	52085/3%	645393/48%
> 91..99	99971/7%	745364/56%
> 100	577907/43%	1323271/100%
>
> i686-linux cc1plus built without the patch:
> cov%	samples	cumul
> 0.0	631348/48%	631348/48%
> 0..10	7764/0%	639112/48%
> 11..20	9690/0%	648802/49%
> 21..30	25036/1%	673838/51%
> 31..40	16113/1%	689951/52%
> 41..50	19753/1%	709704/54%
> 51..60	14563/1%	724267/55%
> 61..70	34093/2%	758360/58%
> 71..80	17450/1%	775810/59%
> 81..90	31339/2%	807149/61%
> 91..99	60368/4%	867517/66%
> 100	437548/33%	1305065/100%
>
> i686-linux cc1plus built with the patch:
> cov%	samples	cumul
> 0.0	377352/28%	377352/28%
> 0..10	16077/1%	393429/30%
> 11..20	15390/1%	408819/31%
> 21..30	31790/2%	440609/33%
> 31..40	23889/1%	464498/35%
> 41..50	29267/2%	493765/37%
> 51..60	22902/1%	516667/39%
> 61..70	45629/3%	562296/43%
> 71..80	29511/2%	591807/45%
> 81..90	50536/3%	642343/49%
> 91..99	93584/7%	735927/56%
> 100	569138/43%	1305065/100%
>
> .debug_info/.debug_loc sizes in bytes:
> x86_64-linux cc1plus without patch .debug_info 75411710, .debug_loc 75421077
> x86_64-linux cc1plus with    patch .debug_info 78498790, .debug_loc 90530117
>    i686-linux cc1plus without patch .debug_info 59921183, .debug_loc 37823166
>    i686-linux cc1plus with    patch .debug_info 63009554, .debug_loc 59535100
>
> I've also performed instrumented bootstraps/regtests (x86_64-linux and
> i686-linux), where I've logged in how many functions the result of
> vt_stack_adjustments differed between the bad old way and new way.
> In both the bootstraps/regtests, it affected 16892 32-bit and 6646 64-bit
> functions, in all cases it was old way giving up and new way succeeding.
>
> Not adding a testcase, as the one in the PR failed to produce proper debug
> info only in 4.8 (then got latent), there already are some guality improvements with the
> patch:
> -FAIL: gcc.dg/guality/pr54693-2.c   -O3 -fomit-frame-pointer -funroll-all-loops -finline-functions  line 21 i == v + 1
> -FAIL: gcc.dg/guality/pr54693-2.c   -O3 -fomit-frame-pointer -funroll-loops  line 21 i == v + 1
> on x86_64 and:
> -FAIL: gcc.dg/guality/pr54519-1.c   -O2  line 20 y == 25
> -FAIL: gcc.dg/guality/pr54519-1.c   -O2  line 20 z == 6
> -FAIL: gcc.dg/guality/pr54519-1.c   -O2  line 23 y == 117
> -FAIL: gcc.dg/guality/pr54519-1.c   -O2  line 23 z == 8
> -FAIL: gcc.dg/guality/pr54519-1.c   -O3 -fomit-frame-pointer  line 20 x == 36
> -FAIL: gcc.dg/guality/pr54519-1.c   -O3 -fomit-frame-pointer  line 20 y == 25
> -FAIL: gcc.dg/guality/pr54519-1.c   -O3 -fomit-frame-pointer  line 20 z == 6
> -FAIL: gcc.dg/guality/pr54519-1.c   -O3 -g  line 20 x == 36
> -FAIL: gcc.dg/guality/pr54519-1.c   -O3 -g  line 20 y == 25
> -FAIL: gcc.dg/guality/pr54519-1.c   -O3 -g  line 20 z == 6
> -FAIL: gcc.dg/guality/pr54519-3.c   -O2  line 20 x == 36
> -FAIL: gcc.dg/guality/pr54519-3.c   -O2  line 20 y == 25
> -FAIL: gcc.dg/guality/pr54519-3.c   -O2  line 20 z == 6
> -FAIL: gcc.dg/guality/pr54519-3.c   -O2  line 23 x == 98
> -FAIL: gcc.dg/guality/pr54519-3.c   -O2  line 23 y == 117
> -FAIL: gcc.dg/guality/pr54519-3.c   -O2  line 23 z == 8
> -FAIL: gcc.dg/guality/pr54519-3.c   -O2 -flto  line 20 x == 36
> -FAIL: gcc.dg/guality/pr54519-3.c   -O2 -flto  line 23 x == 98
> -FAIL: gcc.dg/guality/pr54519-3.c   -O2 -flto -flto-partition=none  line 20 x == 36
> -FAIL: gcc.dg/guality/pr54519-3.c   -O2 -flto -flto-partition=none  line 23 x == 98
> -FAIL: gcc.dg/guality/pr54519-3.c   -O3 -fomit-frame-pointer  line 20 x == 36
> -FAIL: gcc.dg/guality/pr54519-3.c   -O3 -fomit-frame-pointer  line 20 y == 25
> -FAIL: gcc.dg/guality/pr54519-3.c   -O3 -fomit-frame-pointer  line 20 z == 6
> -FAIL: gcc.dg/guality/pr54519-3.c   -O3 -fomit-frame-pointer  line 23 x == 98
> -FAIL: gcc.dg/guality/pr54519-3.c   -O3 -fomit-frame-pointer  line 23 y == 117
> -FAIL: gcc.dg/guality/pr54519-3.c   -O3 -fomit-frame-pointer  line 23 z == 8
> -FAIL: gcc.dg/guality/pr54519-3.c   -O3 -g  line 20 x == 36
> -FAIL: gcc.dg/guality/pr54519-3.c   -O3 -g  line 20 y == 25
> -FAIL: gcc.dg/guality/pr54519-3.c   -O3 -g  line 20 z == 6
> -FAIL: gcc.dg/guality/pr54519-3.c   -O3 -g  line 23 x == 98
> -FAIL: gcc.dg/guality/pr54519-3.c   -O3 -g  line 23 y == 117
> -FAIL: gcc.dg/guality/pr54519-3.c   -O3 -g  line 23 z == 8
> -FAIL: gcc.dg/guality/pr54693-2.c   -O3 -fomit-frame-pointer -funroll-all-loops -finline-functions  line 21 i == v + 1
> -FAIL: gcc.dg/guality/pr54693-2.c   -O3 -fomit-frame-pointer -funroll-loops  line 21 i == v + 1
> on i686.
>
> Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?
>
> Not sure about branches, to some extent this must be a serious debug info
> quality regression, since the problem got significantly worse with omitting
> frame pointer by default on i686, and/or shrink wrapping and simple_return
> changes where there are more pops in the middle of functions.  So with a short
> patch we could significantly improve debug info.  The risk is compile time
> regressions, if we gave up on var-tracking, we threw away all the hard tracked
> debug stmts/debug insns, but don't need to costly propagate the values through
> the dataflow.
>
> 2014-10-23  Jakub Jelinek  <jakub@redhat.com>
>
> 	PR debug/63623
> 	* var-tracking.c (stack_adjust_offset_pre_post_cb): New function.
> 	(stack_adjust_offset_pre_post): Use it through for_each_inc_dec,
> 	instead of only handling autoinc in dest if it is a MEM.
> 	(vt_stack_adjustments): Fix up formatting.
OK.

Jeff
diff mbox

Patch

--- gcc/var-tracking.c.jj	2014-09-01 09:43:58.000000000 +0200
+++ gcc/var-tracking.c	2014-10-22 22:08:42.985717561 +0200
@@ -700,6 +700,39 @@  static void vt_add_function_parameters (
 static bool vt_initialize (void);
 static void vt_finalize (void);
 
+/* Callback for stack_adjust_offset_pre_post, called via for_each_inc_dec.  */
+
+static int
+stack_adjust_offset_pre_post_cb (rtx, rtx op, rtx dest, rtx src, rtx srcoff,
+				 void *arg)
+{
+  if (dest != stack_pointer_rtx)
+    return 0;
+
+  switch (GET_CODE (op))
+    {
+    case PRE_INC:
+    case PRE_DEC:
+      ((HOST_WIDE_INT *)arg)[0] -= INTVAL (srcoff);
+      return 0;
+    case POST_INC:
+    case POST_DEC:
+      ((HOST_WIDE_INT *)arg)[1] -= INTVAL (srcoff);
+      return 0;
+    case PRE_MODIFY:
+    case POST_MODIFY:
+      /* We handle only adjustments by constant amount.  */
+      gcc_assert (GET_CODE (src) == PLUS
+		  && CONST_INT_P (XEXP (src, 1))
+		  && XEXP (src, 0) == stack_pointer_rtx);
+      ((HOST_WIDE_INT *)arg)[GET_CODE (op) == POST_MODIFY]
+	-= INTVAL (XEXP (src, 1));
+      return 0;
+    default:
+      gcc_unreachable ();
+    }
+}
+
 /* Given a SET, calculate the amount of stack adjustment it contains
    PRE- and POST-modifying stack pointer.
    This function is similar to stack_adjust_offset.  */
@@ -725,68 +758,12 @@  stack_adjust_offset_pre_post (rtx patter
 	*post += INTVAL (XEXP (src, 1));
       else
 	*post -= INTVAL (XEXP (src, 1));
+      return;	
     }
-  else if (MEM_P (dest))
-    {
-      /* (set (mem (pre_dec (reg sp))) (foo)) */
-      src = XEXP (dest, 0);
-      code = GET_CODE (src);
-
-      switch (code)
-	{
-	case PRE_MODIFY:
-	case POST_MODIFY:
-	  if (XEXP (src, 0) == stack_pointer_rtx)
-	    {
-	      rtx val = XEXP (XEXP (src, 1), 1);
-	      /* We handle only adjustments by constant amount.  */
-	      gcc_assert (GET_CODE (XEXP (src, 1)) == PLUS &&
-			  CONST_INT_P (val));
-
-	      if (code == PRE_MODIFY)
-		*pre -= INTVAL (val);
-	      else
-		*post -= INTVAL (val);
-	      break;
-	    }
-	  return;
-
-	case PRE_DEC:
-	  if (XEXP (src, 0) == stack_pointer_rtx)
-	    {
-	      *pre += GET_MODE_SIZE (GET_MODE (dest));
-	      break;
-	    }
-	  return;
-
-	case POST_DEC:
-	  if (XEXP (src, 0) == stack_pointer_rtx)
-	    {
-	      *post += GET_MODE_SIZE (GET_MODE (dest));
-	      break;
-	    }
-	  return;
-
-	case PRE_INC:
-	  if (XEXP (src, 0) == stack_pointer_rtx)
-	    {
-	      *pre -= GET_MODE_SIZE (GET_MODE (dest));
-	      break;
-	    }
-	  return;
-
-	case POST_INC:
-	  if (XEXP (src, 0) == stack_pointer_rtx)
-	    {
-	      *post -= GET_MODE_SIZE (GET_MODE (dest));
-	      break;
-	    }
-	  return;
-
-	default:
-	  return;
-	}
-    }
+  HOST_WIDE_INT res[2] = { 0, 0 };
+  for_each_inc_dec (pattern, stack_adjust_offset_pre_post_cb, res);
+  *pre += res[0];
+  *post += res[1];
 }
 
 /* Given an INSN, calculate the amount of stack adjustment it contains
@@ -836,10 +813,10 @@  vt_stack_adjustments (void)
 
   /* Initialize entry block.  */
   VTI (ENTRY_BLOCK_PTR_FOR_FN (cfun))->visited = true;
-  VTI (ENTRY_BLOCK_PTR_FOR_FN (cfun))->in.stack_adjust =
- INCOMING_FRAME_SP_OFFSET;
-  VTI (ENTRY_BLOCK_PTR_FOR_FN (cfun))->out.stack_adjust =
- INCOMING_FRAME_SP_OFFSET;
+  VTI (ENTRY_BLOCK_PTR_FOR_FN (cfun))->in.stack_adjust
+    = INCOMING_FRAME_SP_OFFSET;
+  VTI (ENTRY_BLOCK_PTR_FOR_FN (cfun))->out.stack_adjust
+    = INCOMING_FRAME_SP_OFFSET;
 
   /* Allocate stack for back-tracking up CFG.  */
   stack = XNEWVEC (edge_iterator, n_basic_blocks_for_fn (cfun) + 1);