diff mbox

[4/N] Recover GOTO predictor.

Message ID faba6d46-0f9e-f768-e2dc-5b661565c529@suse.cz
State New
Headers show

Commit Message

Martin Liška June 21, 2017, 1:06 p.m. UTC
Hello.

There's one additional predictor enhancement that is GOTO predict that
used to working. Following patch adds expect statement for C and C++ family
languages.

There's one fallout which is vrp24.c test-case, where only 'Simplified relational'
appears just once. Adding Richi and Patrick who can probably help how to fix the
test-case.

Patch can bootstrap on ppc64le-redhat-linux and survives regression tests.

Ready to be installed?
Martin
;; Function sss (sss, funcdef_no=0, decl_uid=1811, cgraph_uid=0, symbol_order=0)

;; 3 loops found
;;
;; Loop 0
;;  header 0, latch 1
;;  depth 0, outer -1
;;  nodes: 0 1 2 3 4 5 6 7 8 9 10 11 12
;; 2 succs { 10 3 }
;; 3 succs { 4 8 }
;; 4 succs { 10 5 }
;; 5 succs { 12 }
;; 6 succs { 7 }
;; 7 succs { 9 }
;; 8 succs { 12 }
;; 9 succs { 12 }
;; 10 succs { 6 11 }
;; 11 succs { 9 }
;; 12 succs { 1 }

SSA form after inserting ASSERT_EXPRs
sss (struct rtx_def * insn, int code1, int code2, int code3)
{
  int D1544;
  int n_sets;
  struct rtx_def * body;
  _Bool D1562;

  <bb 2> [100.00%] [count: INV]:
  body_5 = insn_4(D)->u.fld[5].rt_rtx;
  D1544_6 = body_5->code;
  if (D1544_6 == 55)
    goto <bb 10> (L7); [34.00%] [count: INV]
  else
    goto <bb 3>; [66.00%] [count: INV]

  <bb 3> [66.00%] [count: INV]:
  if (code3_7(D) == 99)
    goto <bb 4>; [34.00%] [count: INV]
  else
    goto <bb 8>; [66.00%] [count: INV]

  <bb 4> [22.44%] [count: INV]:
  D1562_9 = code1_8(D) == 10;
  n_sets_10 = (int) D1562_9;
  if (n_sets_10 > 0)
    goto <bb 10> (L7); [64.00%] [count: INV]
  else
    goto <bb 5>; [36.00%] [count: INV]

  <bb 5> [8.10%] [count: INV]:
  # n_sets_1 = PHI <0(4)>
  goto <bb 12> (L16); [100.00%] [count: INV]

  <bb 6> [9.79%] [count: INV]:
  arf ();

  <bb 7> [9.82%] [count: INV]:
  # n_sets_19 = PHI <1(6)>
  goto <bb 9>; [100.00%] [count: INV]

  <bb 8> [43.56%] [count: INV]:
  # n_sets_21 = PHI <0(3)>
  goto <bb 12> (L16); [100.00%] [count: INV]

  <bb 9> [46.68%] [count: INV]:
  frob ();
  goto <bb 12> (L16); [100.00%] [count: INV]

L7 [48.36%] [count: INV]:
  if (code2_12(D) == 42)
    goto <bb 6>; [20.24%] [count: INV]
  else
    goto <bb 11>; [79.76%] [count: INV]

  <bb 11> [38.61%] [count: INV]:
  # n_sets_17 = PHI <1(10)>
  goto <bb 9>; [100.00%] [count: INV]

L16 [100.00%] [count: INV]:
  return;

}


Immediate_uses: 

n_sets_1 : --> no uses.

.MEM_2 : --> single use.
# VUSE <.MEM_2>
return;

.MEM_3(D) : -->6 uses.
.MEM_22 = PHI <.MEM_3(D)(3)>
.MEM_18 = PHI <.MEM_3(D)(10)>
.MEM_16 = PHI <.MEM_3(D)(4)>
# .MEM_13 = VDEF <.MEM_3(D)>
arf ();
# VUSE <.MEM_3(D)>
D1544_6 = body_5->code;
# VUSE <.MEM_3(D)>
body_5 = insn_4(D)->u.fld[5].rt_rtx;

insn_4(D) : --> single use.
body_5 = insn_4(D)->u.fld[5].rt_rtx;

body_5 : --> single use.
D1544_6 = body_5->code;

D1544_6 : --> single use.
if (D1544_6 == 55)

code3_7(D) : --> single use.
if (code3_7(D) == 99)

code1_8(D) : --> single use.
D1562_9 = code1_8(D) == 10;

D1562_9 : --> single use.
n_sets_10 = (int) D1562_9;

n_sets_10 : --> single use.
if (n_sets_10 > 0)

code2_12(D) : --> single use.
if (code2_12(D) == 42)

.MEM_13 : --> single use.
.MEM_20 = PHI <.MEM_13(6)>

.MEM_14 : --> single use.
.MEM_2 = PHI <.MEM_14(9), .MEM_22(8), .MEM_16(5)>

.MEM_16 : --> single use.
.MEM_2 = PHI <.MEM_14(9), .MEM_22(8), .MEM_16(5)>

n_sets_17 : --> no uses.

.MEM_18 : --> single use.
.MEM_23 = PHI <.MEM_20(7), .MEM_18(11)>

n_sets_19 : --> no uses.

.MEM_20 : --> single use.
.MEM_23 = PHI <.MEM_20(7), .MEM_18(11)>

n_sets_21 : --> no uses.

.MEM_22 : --> single use.
.MEM_2 = PHI <.MEM_14(9), .MEM_22(8), .MEM_16(5)>

.MEM_23 : --> single use.
# .MEM_14 = VDEF <.MEM_23>
frob ();

Adding destination of edge (0 -> 2) to worklist

Simulating block 2

Visiting statement:
if (D1544_6 == 55)

Visiting conditional with predicate: if (D1544_6 == 55)

With known ranges
	D1544_6: VARYING

Predicate evaluates to: DON'T KNOW
Adding destination of edge (2 -> 10) to worklist
Adding destination of edge (2 -> 3) to worklist

Simulating block 3

Visiting statement:
if (code3_7(D) == 99)

Visiting conditional with predicate: if (code3_7(D) == 99)

With known ranges
	code3_7(D): []

Predicate evaluates to: DON'T KNOW
Adding destination of edge (3 -> 4) to worklist
Adding destination of edge (3 -> 8) to worklist

Simulating block 8

Visiting PHI node: n_sets_21 = PHI <0(3)>
    Argument #0 (3 -> 8 executable)
	0: [0, 0]
Intersecting
  [0, 0]
and
  [0, 1]
to
  [0, 0]
Found new range for n_sets_21: [0, 0]
marking stmt to be not simulated again
Adding destination of edge (8 -> 12) to worklist

Simulating block 4

Visiting statement:
D1562_9 = code1_8(D) == 10;
Intersecting
  [0, +INF]
and
  [0, +INF]
to
  [0, +INF]
Found new range for D1562_9: [0, +INF]
marking stmt to be not simulated again

Visiting statement:
n_sets_10 = (int) D1562_9;
Intersecting
  [0, 1]
and
  [0, 1]
to
  [0, 1]
Found new range for n_sets_10: [0, 1]
marking stmt to be not simulated again

Visiting statement:
if (n_sets_10 > 0)

Visiting conditional with predicate: if (n_sets_10 > 0)

With known ranges
	n_sets_10: [0, 1]

Predicate evaluates to: DON'T KNOW
Adding destination of edge (4 -> 10) to worklist
Adding destination of edge (4 -> 5) to worklist

Simulating block 5

Visiting PHI node: n_sets_1 = PHI <0(4)>
    Argument #0 (4 -> 5 executable)
	0: [0, 0]
Intersecting
  [0, 0]
and
  [0, 1]
to
  [0, 0]
Found new range for n_sets_1: [0, 0]
marking stmt to be not simulated again
Adding destination of edge (5 -> 12) to worklist

Simulating block 10

Visiting statement:
if (code2_12(D) == 42)

Visiting conditional with predicate: if (code2_12(D) == 42)

With known ranges
	code2_12(D): []

Predicate evaluates to: DON'T KNOW
Adding destination of edge (10 -> 6) to worklist
Adding destination of edge (10 -> 11) to worklist

Simulating block 11

Visiting PHI node: n_sets_17 = PHI <1(10)>
    Argument #0 (10 -> 11 executable)
	1: [1, 1]
Intersecting
  [1, 1]
and
  [0, 1]
to
  [1, 1]
Found new range for n_sets_17: [1, 1]
marking stmt to be not simulated again
Adding destination of edge (11 -> 9) to worklist

Simulating block 6
Adding destination of edge (6 -> 7) to worklist

Simulating block 7

Visiting PHI node: n_sets_19 = PHI <1(6)>
    Argument #0 (6 -> 7 executable)
	1: [1, 1]
Intersecting
  [1, 1]
and
  [0, 1]
to
  [1, 1]
Found new range for n_sets_19: [1, 1]
marking stmt to be not simulated again
Adding destination of edge (7 -> 9) to worklist

Simulating block 9
Adding destination of edge (9 -> 12) to worklist

Simulating block 12

Visiting statement:
return;

Value ranges after VRP:

n_sets_1: [0, 0]
.MEM_2: VARYING
body_5: VARYING
D1544_6: VARYING
code3_7(D): VARYING
code1_8(D): VARYING
D1562_9: [0, +INF]
n_sets_10: [0, 1]
code2_12(D): VARYING
.MEM_16: VARYING
n_sets_17: [1, 1]
.MEM_18: VARYING
n_sets_19: [1, 1]
.MEM_20: VARYING
n_sets_21: [0, 0]
.MEM_22: VARYING
.MEM_23: VARYING



Substituting values and folding statements

Folding statement: body_5 = insn_4(D)->u.fld[5].rt_rtx;
Not folded
Folding statement: D1544_6 = body_5->code;
Not folded
Folding statement: if (D1544_6 == 55)
Not folded
Folding statement: if (code3_7(D) == 99)
Not folded
Folding statement: D1562_9 = code1_8(D) == 10;
Not folded
Folding statement: n_sets_10 = (int) D1562_9;
Not folded
Folding statement: if (n_sets_10 > 0)
Simplified relational if (n_sets_10 > 0)
 into if (n_sets_10 == 1)

Folded into: if (n_sets_10 == 1)

Folding statement: L7 [48.36%] [count: INV]:
Not folded
Folding statement: if (code2_12(D) == 42)
Not folded
Folding statement: arf ();
Not folded
Folding statement: frob ();
Not folded
Folding statement: L16 [100.00%] [count: INV]:
Not folded
Folding statement: return;
Not folded
Removing dead stmt n_sets_19 = PHI <1(6)>

Removing dead stmt n_sets_17 = PHI <1(10)>

Removing dead stmt n_sets_1 = PHI <0(4)>

Removing dead stmt n_sets_21 = PHI <0(3)>

LKUP STMT code2_12(D) eq_expr 42
0>>> COPY .MEM_18 = .MEM_3(D)
<<<< COPY .MEM_18 = .MEM_3(D)
0>>> COPY .MEM_16 = .MEM_3(D)
0>>> COPY .MEM_2 = .MEM_3(D)
<<<< COPY .MEM_2 = .MEM_3(D)
<<<< COPY .MEM_16 = .MEM_3(D)
0>>> COPY .MEM_22 = .MEM_3(D)
0>>> COPY .MEM_2 = .MEM_3(D)
<<<< COPY .MEM_2 = .MEM_3(D)
<<<< COPY .MEM_22 = .MEM_3(D)
0>>> COPY .MEM_20 = .MEM_13
0>>> COPY .MEM_23 = .MEM_13
<<<< COPY .MEM_23 = .MEM_13
<<<< COPY .MEM_20 = .MEM_13
0>>> COPY .MEM_18 = .MEM_3(D)
0>>> COPY .MEM_23 = .MEM_3(D)
<<<< COPY .MEM_23 = .MEM_3(D)
<<<< COPY .MEM_18 = .MEM_3(D)
LKUP STMT code2_12(D) eq_expr 42
0>>> COPY .MEM_18 = .MEM_3(D)
<<<< COPY .MEM_18 = .MEM_3(D)
Folded into: if (D1562_9 == 1)

Merging blocks 6 and 7
fix_loop_structure: fixing up loops for function
sss (struct rtx_def * insn, int code1, int code2, int code3)
{
  int D1544;
  int n_sets;
  struct rtx_def * body;
  _Bool D1562;

  <bb 2> [100.00%] [count: INV]:
  body_5 = insn_4(D)->u.fld[5].rt_rtx;
  D1544_6 = body_5->code;
  if (D1544_6 == 55)
    goto <bb 9> (L7); [34.00%] [count: INV]
  else
    goto <bb 3>; [66.00%] [count: INV]

  <bb 3> [66.00%] [count: INV]:
  if (code3_7(D) == 99)
    goto <bb 4>; [34.00%] [count: INV]
  else
    goto <bb 7>; [66.00%] [count: INV]

  <bb 4> [22.44%] [count: INV]:
  D1562_9 = code1_8(D) == 10;
  n_sets_10 = (int) D1562_9;
  if (D1562_9 == 1)
    goto <bb 9> (L7); [64.00%] [count: INV]
  else
    goto <bb 5>; [36.00%] [count: INV]

  <bb 5> [8.10%] [count: INV]:
  goto <bb 11> (L16); [100.00%] [count: INV]

  <bb 6> [9.82%] [count: INV]:
  arf ();
  goto <bb 8>; [100.00%] [count: INV]

  <bb 7> [43.56%] [count: INV]:
  goto <bb 11> (L16); [100.00%] [count: INV]

  <bb 8> [46.68%] [count: INV]:
  frob ();
  goto <bb 11> (L16); [100.00%] [count: INV]

L7 [48.36%] [count: INV]:
  if (code2_12(D) == 42)
    goto <bb 6>; [20.24%] [count: INV]
  else
    goto <bb 10>; [79.76%] [count: INV]

  <bb 10> [38.61%] [count: INV]:
  goto <bb 8>; [100.00%] [count: INV]

L16 [100.00%] [count: INV]:
  return;

}

Comments

Martin Liška June 21, 2017, 1:09 p.m. UTC | #1
On 06/21/2017 03:06 PM, Martin Liška wrote:
> Hello.
> 
> There's one additional predictor enhancement that is GOTO predict that
> used to working. Following patch adds expect statement for C and C++ family
> languages.
> 
> There's one fallout which is vrp24.c test-case, where only 'Simplified relational'
> appears just once. Adding Richi and Patrick who can probably help how to fix the
> test-case.
> 
> Patch can bootstrap on ppc64le-redhat-linux and survives regression tests.
> 
> Ready to be installed?
> Martin
> 

And I forgot to mention hitrate on SPEC2017:

HEURISTICS                               BRANCHES  (REL)  BR. HITRATE            HITRATE       COVERAGE COVERAGE  (REL)  predict.def  (REL)
goto                                          622   1.0%       64.31%   65.92% /  83.70%      725127790  725.13M   0.1%

Which says it's quite rare predictor, but with quite nice hitrate.

Martin
Richard Biener June 22, 2017, 10:27 a.m. UTC | #2
On Wed, Jun 21, 2017 at 3:06 PM, Martin Liška <mliska@suse.cz> wrote:
> Hello.
>
> There's one additional predictor enhancement that is GOTO predict that
> used to working. Following patch adds expect statement for C and C++ family
> languages.
>
> There's one fallout which is vrp24.c test-case, where only 'Simplified relational'
> appears just once. Adding Richi and Patrick who can probably help how to fix the
> test-case.

Happens to be optimized better now, there's only one predicate to simplify
left in the IL input to VRP1.  I suggest to change it to match 1 times and add
-fdump-tree-optimized to dg-options and

/* { dg-final { scan-tree-dump-times "if " 3 "optimized" } } */

to verify we have 3 conditions left.

> Patch can bootstrap on ppc64le-redhat-linux and survives regression tests.
>
> Ready to be installed?
> Martin
Martin Liška June 22, 2017, 10:57 a.m. UTC | #3
On 06/22/2017 12:27 PM, Richard Biener wrote:
> On Wed, Jun 21, 2017 at 3:06 PM, Martin Liška <mliska@suse.cz> wrote:
>> Hello.
>>
>> There's one additional predictor enhancement that is GOTO predict that
>> used to working. Following patch adds expect statement for C and C++ family
>> languages.
>>
>> There's one fallout which is vrp24.c test-case, where only 'Simplified relational'
>> appears just once. Adding Richi and Patrick who can probably help how to fix the
>> test-case.
> 
> Happens to be optimized better now, there's only one predicate to simplify
> left in the IL input to VRP1.  I suggest to change it to match 1 times and add
> -fdump-tree-optimized to dg-options and
> 
> /* { dg-final { scan-tree-dump-times "if " 3 "optimized" } } */
> 
> to verify we have 3 conditions left.

Thanks for help.
What about the comment:

/* The first n_sets > 0 test can be simplfiied into n_sets == 1 since
   n_sets can only have the values [0, 1] as it's the result of a
   boolean operation.

   The second n_sets > 0 test can also be simplified into n_sets == 1
   as the only way to reach the tests is when n_sets <= 1 and the only
   value which satisfies both conditions is n_sets == 1.  */

I guess just only one can be valid? Or is it a different story now?

Martin

> 
>> Patch can bootstrap on ppc64le-redhat-linux and survives regression tests.
>>
>> Ready to be installed?
>> Martin
Richard Biener June 22, 2017, 11:47 a.m. UTC | #4
On Thu, Jun 22, 2017 at 12:57 PM, Martin Liška <mliska@suse.cz> wrote:
> On 06/22/2017 12:27 PM, Richard Biener wrote:
>> On Wed, Jun 21, 2017 at 3:06 PM, Martin Liška <mliska@suse.cz> wrote:
>>> Hello.
>>>
>>> There's one additional predictor enhancement that is GOTO predict that
>>> used to working. Following patch adds expect statement for C and C++ family
>>> languages.
>>>
>>> There's one fallout which is vrp24.c test-case, where only 'Simplified relational'
>>> appears just once. Adding Richi and Patrick who can probably help how to fix the
>>> test-case.
>>
>> Happens to be optimized better now, there's only one predicate to simplify
>> left in the IL input to VRP1.  I suggest to change it to match 1 times and add
>> -fdump-tree-optimized to dg-options and
>>
>> /* { dg-final { scan-tree-dump-times "if " 3 "optimized" } } */
>>
>> to verify we have 3 conditions left.
>
> Thanks for help.
> What about the comment:
>
> /* The first n_sets > 0 test can be simplfiied into n_sets == 1 since
>    n_sets can only have the values [0, 1] as it's the result of a
>    boolean operation.
>
>    The second n_sets > 0 test can also be simplified into n_sets == 1
>    as the only way to reach the tests is when n_sets <= 1 and the only
>    value which satisfies both conditions is n_sets == 1.  */
>
> I guess just only one can be valid? Or is it a different story now?

The 2nd one is handled by earlier jump-threading.

> Martin
>
>>
>>> Patch can bootstrap on ppc64le-redhat-linux and survives regression tests.
>>>
>>> Ready to be installed?
>>> Martin
>
Martin Liška June 30, 2017, 9:24 a.m. UTC | #5
PING^1

Can you please Honza give a formal approval for the patch?

Thanks,
Martin

On 06/22/2017 01:47 PM, Richard Biener wrote:
> On Thu, Jun 22, 2017 at 12:57 PM, Martin Liška <mliska@suse.cz> wrote:
>> On 06/22/2017 12:27 PM, Richard Biener wrote:
>>> On Wed, Jun 21, 2017 at 3:06 PM, Martin Liška <mliska@suse.cz> wrote:
>>>> Hello.
>>>>
>>>> There's one additional predictor enhancement that is GOTO predict that
>>>> used to working. Following patch adds expect statement for C and C++ family
>>>> languages.
>>>>
>>>> There's one fallout which is vrp24.c test-case, where only 'Simplified relational'
>>>> appears just once. Adding Richi and Patrick who can probably help how to fix the
>>>> test-case.
>>>
>>> Happens to be optimized better now, there's only one predicate to simplify
>>> left in the IL input to VRP1.  I suggest to change it to match 1 times and add
>>> -fdump-tree-optimized to dg-options and
>>>
>>> /* { dg-final { scan-tree-dump-times "if " 3 "optimized" } } */
>>>
>>> to verify we have 3 conditions left.
>>
>> Thanks for help.
>> What about the comment:
>>
>> /* The first n_sets > 0 test can be simplfiied into n_sets == 1 since
>>    n_sets can only have the values [0, 1] as it's the result of a
>>    boolean operation.
>>
>>    The second n_sets > 0 test can also be simplified into n_sets == 1
>>    as the only way to reach the tests is when n_sets <= 1 and the only
>>    value which satisfies both conditions is n_sets == 1.  */
>>
>> I guess just only one can be valid? Or is it a different story now?
> 
> The 2nd one is handled by earlier jump-threading.
> 
>> Martin
>>
>>>
>>>> Patch can bootstrap on ppc64le-redhat-linux and survives regression tests.
>>>>
>>>> Ready to be installed?
>>>> Martin
>>
Jan Hubicka June 30, 2017, 12:37 p.m. UTC | #6
> PING^1
> 
> Can you please Honza give a formal approval for the patch?

OK,
thanks!
Honza
> 
> Thanks,
> Martin
> 
> On 06/22/2017 01:47 PM, Richard Biener wrote:
> > On Thu, Jun 22, 2017 at 12:57 PM, Martin Liška <mliska@suse.cz> wrote:
> >> On 06/22/2017 12:27 PM, Richard Biener wrote:
> >>> On Wed, Jun 21, 2017 at 3:06 PM, Martin Liška <mliska@suse.cz> wrote:
> >>>> Hello.
> >>>>
> >>>> There's one additional predictor enhancement that is GOTO predict that
> >>>> used to working. Following patch adds expect statement for C and C++ family
> >>>> languages.
> >>>>
> >>>> There's one fallout which is vrp24.c test-case, where only 'Simplified relational'
> >>>> appears just once. Adding Richi and Patrick who can probably help how to fix the
> >>>> test-case.
> >>>
> >>> Happens to be optimized better now, there's only one predicate to simplify
> >>> left in the IL input to VRP1.  I suggest to change it to match 1 times and add
> >>> -fdump-tree-optimized to dg-options and
> >>>
> >>> /* { dg-final { scan-tree-dump-times "if " 3 "optimized" } } */
> >>>
> >>> to verify we have 3 conditions left.
> >>
> >> Thanks for help.
> >> What about the comment:
> >>
> >> /* The first n_sets > 0 test can be simplfiied into n_sets == 1 since
> >>    n_sets can only have the values [0, 1] as it's the result of a
> >>    boolean operation.
> >>
> >>    The second n_sets > 0 test can also be simplified into n_sets == 1
> >>    as the only way to reach the tests is when n_sets <= 1 and the only
> >>    value which satisfies both conditions is n_sets == 1.  */
> >>
> >> I guess just only one can be valid? Or is it a different story now?
> > 
> > The 2nd one is handled by earlier jump-threading.
> > 
> >> Martin
> >>
> >>>
> >>>> Patch can bootstrap on ppc64le-redhat-linux and survives regression tests.
> >>>>
> >>>> Ready to be installed?
> >>>> Martin
> >>
Martin Liška June 30, 2017, 1:48 p.m. UTC | #7
On 06/22/2017 12:27 PM, Richard Biener wrote:
> On Wed, Jun 21, 2017 at 3:06 PM, Martin Liška <mliska@suse.cz> wrote:
>> Hello.
>>
>> There's one additional predictor enhancement that is GOTO predict that
>> used to working. Following patch adds expect statement for C and C++ family
>> languages.
>>
>> There's one fallout which is vrp24.c test-case, where only 'Simplified relational'
>> appears just once. Adding Richi and Patrick who can probably help how to fix the
>> test-case.
> 
> Happens to be optimized better now, there's only one predicate to simplify
> left in the IL input to VRP1.  I suggest to change it to match 1 times and add
> -fdump-tree-optimized to dg-options and
> 
> /* { dg-final { scan-tree-dump-times "if " 3 "optimized" } } */
> 
> to verify we have 3 conditions left.

One small note, I see 4 conditions in optimized dump:

sss (struct rtx_def * insn, int code1, int code2, int code3)
{
  int D1544;
  struct rtx_def * body;
  _Bool D1562;

  <bb 2> [100.00%] [count: INV]:
  body_5 = insn_4(D)->u.fld[5].rt_rtx;
  D1544_6 = body_5->code;
  if (D1544_6 == 55)
    goto <bb 7> (L7); [34.00%] [count: INV]
  else
    goto <bb 3>; [66.00%] [count: INV]

  <bb 3> [66.00%] [count: INV]:
  if (code3_7(D) == 99)
    goto <bb 4>; [34.00%] [count: INV]
  else
    goto <bb 8> (L16); [66.00%] [count: INV]

  <bb 4> [22.44%] [count: INV]:
  D1562_9 = code1_8(D) == 10;
  if (D1562_9 == 1)
    goto <bb 7> (L7); [64.00%] [count: INV]
  else
    goto <bb 8> (L16); [36.00%] [count: INV]

  <bb 5> [9.82%] [count: INV]:
  arf ();

  <bb 6> [46.68%] [count: INV]:
  frob (); [tail call]
  goto <bb 8> (L16); [100.00%] [count: INV]

L7 [48.36%] [count: INV]:
  if (code2_12(D) == 42)
    goto <bb 5>; [20.24%] [count: INV]
  else
    goto <bb 6>; [79.76%] [count: INV]

L16 [100.00%] [count: INV]:
  return;

}

Is it a problem or adjusting to 4 is fine?

Martin

> 
>> Patch can bootstrap on ppc64le-redhat-linux and survives regression tests.
>>
>> Ready to be installed?
>> Martin
Martin Liška July 31, 2017, 7:46 a.m. UTC | #8
Richi?

Thanks

On 06/30/2017 03:48 PM, Martin Liška wrote:
> On 06/22/2017 12:27 PM, Richard Biener wrote:
>> On Wed, Jun 21, 2017 at 3:06 PM, Martin Liška <mliska@suse.cz> wrote:
>>> Hello.
>>>
>>> There's one additional predictor enhancement that is GOTO predict that
>>> used to working. Following patch adds expect statement for C and C++ family
>>> languages.
>>>
>>> There's one fallout which is vrp24.c test-case, where only 'Simplified relational'
>>> appears just once. Adding Richi and Patrick who can probably help how to fix the
>>> test-case.
>>
>> Happens to be optimized better now, there's only one predicate to simplify
>> left in the IL input to VRP1.  I suggest to change it to match 1 times and add
>> -fdump-tree-optimized to dg-options and
>>
>> /* { dg-final { scan-tree-dump-times "if " 3 "optimized" } } */
>>
>> to verify we have 3 conditions left.
> 
> One small note, I see 4 conditions in optimized dump:
> 
> sss (struct rtx_def * insn, int code1, int code2, int code3)
> {
>   int D1544;
>   struct rtx_def * body;
>   _Bool D1562;
> 
>   <bb 2> [100.00%] [count: INV]:
>   body_5 = insn_4(D)->u.fld[5].rt_rtx;
>   D1544_6 = body_5->code;
>   if (D1544_6 == 55)
>     goto <bb 7> (L7); [34.00%] [count: INV]
>   else
>     goto <bb 3>; [66.00%] [count: INV]
> 
>   <bb 3> [66.00%] [count: INV]:
>   if (code3_7(D) == 99)
>     goto <bb 4>; [34.00%] [count: INV]
>   else
>     goto <bb 8> (L16); [66.00%] [count: INV]
> 
>   <bb 4> [22.44%] [count: INV]:
>   D1562_9 = code1_8(D) == 10;
>   if (D1562_9 == 1)
>     goto <bb 7> (L7); [64.00%] [count: INV]
>   else
>     goto <bb 8> (L16); [36.00%] [count: INV]
> 
>   <bb 5> [9.82%] [count: INV]:
>   arf ();
> 
>   <bb 6> [46.68%] [count: INV]:
>   frob (); [tail call]
>   goto <bb 8> (L16); [100.00%] [count: INV]
> 
> L7 [48.36%] [count: INV]:
>   if (code2_12(D) == 42)
>     goto <bb 5>; [20.24%] [count: INV]
>   else
>     goto <bb 6>; [79.76%] [count: INV]
> 
> L16 [100.00%] [count: INV]:
>   return;
> 
> }
> 
> Is it a problem or adjusting to 4 is fine?
> 
> Martin
> 
>>
>>> Patch can bootstrap on ppc64le-redhat-linux and survives regression tests.
>>>
>>> Ready to be installed?
>>> Martin
>
Richard Biener July 31, 2017, 8:55 a.m. UTC | #9
On Mon, Jul 31, 2017 at 9:46 AM, Martin Liška <mliska@suse.cz> wrote:
> Richi?

4 is fine.

> Thanks
>
> On 06/30/2017 03:48 PM, Martin Liška wrote:
>> On 06/22/2017 12:27 PM, Richard Biener wrote:
>>> On Wed, Jun 21, 2017 at 3:06 PM, Martin Liška <mliska@suse.cz> wrote:
>>>> Hello.
>>>>
>>>> There's one additional predictor enhancement that is GOTO predict that
>>>> used to working. Following patch adds expect statement for C and C++ family
>>>> languages.
>>>>
>>>> There's one fallout which is vrp24.c test-case, where only 'Simplified relational'
>>>> appears just once. Adding Richi and Patrick who can probably help how to fix the
>>>> test-case.
>>>
>>> Happens to be optimized better now, there's only one predicate to simplify
>>> left in the IL input to VRP1.  I suggest to change it to match 1 times and add
>>> -fdump-tree-optimized to dg-options and
>>>
>>> /* { dg-final { scan-tree-dump-times "if " 3 "optimized" } } */
>>>
>>> to verify we have 3 conditions left.
>>
>> One small note, I see 4 conditions in optimized dump:
>>
>> sss (struct rtx_def * insn, int code1, int code2, int code3)
>> {
>>   int D1544;
>>   struct rtx_def * body;
>>   _Bool D1562;
>>
>>   <bb 2> [100.00%] [count: INV]:
>>   body_5 = insn_4(D)->u.fld[5].rt_rtx;
>>   D1544_6 = body_5->code;
>>   if (D1544_6 == 55)
>>     goto <bb 7> (L7); [34.00%] [count: INV]
>>   else
>>     goto <bb 3>; [66.00%] [count: INV]
>>
>>   <bb 3> [66.00%] [count: INV]:
>>   if (code3_7(D) == 99)
>>     goto <bb 4>; [34.00%] [count: INV]
>>   else
>>     goto <bb 8> (L16); [66.00%] [count: INV]
>>
>>   <bb 4> [22.44%] [count: INV]:
>>   D1562_9 = code1_8(D) == 10;
>>   if (D1562_9 == 1)
>>     goto <bb 7> (L7); [64.00%] [count: INV]
>>   else
>>     goto <bb 8> (L16); [36.00%] [count: INV]
>>
>>   <bb 5> [9.82%] [count: INV]:
>>   arf ();
>>
>>   <bb 6> [46.68%] [count: INV]:
>>   frob (); [tail call]
>>   goto <bb 8> (L16); [100.00%] [count: INV]
>>
>> L7 [48.36%] [count: INV]:
>>   if (code2_12(D) == 42)
>>     goto <bb 5>; [20.24%] [count: INV]
>>   else
>>     goto <bb 6>; [79.76%] [count: INV]
>>
>> L16 [100.00%] [count: INV]:
>>   return;
>>
>> }
>>
>> Is it a problem or adjusting to 4 is fine?
>>
>> Martin
>>
>>>
>>>> Patch can bootstrap on ppc64le-redhat-linux and survives regression tests.
>>>>
>>>> Ready to be installed?
>>>> Martin
>>
>
diff mbox

Patch

From 4963172e22489a83caa866854386b6d8b5a62f7a Mon Sep 17 00:00:00 2001
From: marxin <mliska@suse.cz>
Date: Mon, 19 Jun 2017 15:44:34 +0200
Subject: [PATCH] Recover GOTO predictor.

gcc/c/ChangeLog:

2017-06-21  Jan Hubicka <hubicka@ucw.cz>
	    Martin Liska  <mliska@suse.cz>

	* c-typeck.c (c_finish_goto_label): Build gimple predict
	stament.

gcc/ChangeLog:

2017-06-21  Jan Hubicka <hubicka@ucw.cz>
	    Martin Liska  <mliska@suse.cz>

	* predict.def: Remove old comment and adjust probability.
	* gimplify.c (should_warn_for_implicit_fallthrough): Ignore
	PREDICT statements.

gcc/testsuite/ChangeLog:

2017-06-21  Jan Hubicka <hubicka@ucw.cz>
	    Martin Liska  <mliska@suse.cz>

	* gcc.dg/predict-15.c: New test.
	* gcc.dg/tree-ssa/attr-hotcold-2.c: Update the test-case.

gcc/cp/ChangeLog:

2017-06-21  Jan Hubicka <hubicka@ucw.cz>
	    Martin Liska  <mliska@suse.cz>

	* pt.c (tsubst_copy): Copy PREDICT_EXPR.
	* semantics.c (finish_goto_stmt): Build gimple predict
	stament.
	* constexpr.c (potential_constant_expression_1): Handle
	PREDICT_EXPR.
---
 gcc/c/c-typeck.c                               |  1 +
 gcc/cp/constexpr.c                             |  1 +
 gcc/cp/pt.c                                    |  2 ++
 gcc/cp/semantics.c                             |  2 ++
 gcc/gimplify.c                                 |  4 +++-
 gcc/predict.def                                |  5 ++---
 gcc/testsuite/gcc.dg/predict-15.c              | 17 +++++++++++++++++
 gcc/testsuite/gcc.dg/tree-ssa/attr-hotcold-2.c | 13 ++++++-------
 8 files changed, 34 insertions(+), 11 deletions(-)
 create mode 100644 gcc/testsuite/gcc.dg/predict-15.c

diff --git a/gcc/c/c-typeck.c b/gcc/c/c-typeck.c
index 4d067e96dd3..c677c3e9ff5 100644
--- a/gcc/c/c-typeck.c
+++ b/gcc/c/c-typeck.c
@@ -9824,6 +9824,7 @@  c_finish_goto_label (location_t loc, tree label)
     return NULL_TREE;
   TREE_USED (decl) = 1;
   {
+    add_stmt (build_predict_expr (PRED_GOTO, NOT_TAKEN));
     tree t = build1 (GOTO_EXPR, void_type_node, decl);
     SET_EXPR_LOCATION (t, loc);
     return add_stmt (t);
diff --git a/gcc/cp/constexpr.c b/gcc/cp/constexpr.c
index 5a574524866..788e02d7372 100644
--- a/gcc/cp/constexpr.c
+++ b/gcc/cp/constexpr.c
@@ -5784,6 +5784,7 @@  potential_constant_expression_1 (tree t, bool want_rval, bool strict,
 
     case CLEANUP_STMT:
     case EMPTY_CLASS_EXPR:
+    case PREDICT_EXPR:
       return false;
 
     case GOTO_EXPR:
diff --git a/gcc/cp/pt.c b/gcc/cp/pt.c
index 69ca9291960..445b46da2de 100644
--- a/gcc/cp/pt.c
+++ b/gcc/cp/pt.c
@@ -15111,6 +15111,8 @@  tsubst_copy (tree t, tree args, tsubst_flags_t complain, tree in_decl)
       return tsubst_binary_left_fold (t, args, complain, in_decl);
     case BINARY_RIGHT_FOLD_EXPR:
       return tsubst_binary_right_fold (t, args, complain, in_decl);
+    case PREDICT_EXPR:
+      return t;
 
     default:
       /* We shouldn't get here, but keep going if !flag_checking.  */
diff --git a/gcc/cp/semantics.c b/gcc/cp/semantics.c
index 5fe772a49e3..a5fcc7b2c63 100644
--- a/gcc/cp/semantics.c
+++ b/gcc/cp/semantics.c
@@ -41,6 +41,7 @@  along with GCC; see the file COPYING3.  If not see
 #include "omp-general.h"
 #include "convert.h"
 #include "gomp-constants.h"
+#include "predict.h"
 
 /* There routines provide a modular interface to perform many parsing
    operations.  They may therefore be used during actual parsing, or
@@ -630,6 +631,7 @@  finish_goto_stmt (tree destination)
 
   check_goto (destination);
 
+  add_stmt (build_predict_expr (PRED_GOTO, NOT_TAKEN));
   return add_stmt (build_stmt (input_location, GOTO_EXPR, destination));
 }
 
diff --git a/gcc/gimplify.c b/gcc/gimplify.c
index f45d3bfd13a..0ea8d4ea07b 100644
--- a/gcc/gimplify.c
+++ b/gcc/gimplify.c
@@ -2042,7 +2042,9 @@  should_warn_for_implicit_fallthrough (gimple_stmt_iterator *gsi_p, tree label)
   gsi = *gsi_p;
 
   /* Skip all immediately following labels.  */
-  while (!gsi_end_p (gsi) && gimple_code (gsi_stmt (gsi)) == GIMPLE_LABEL)
+  while (!gsi_end_p (gsi)
+	 && (gimple_code (gsi_stmt (gsi)) == GIMPLE_LABEL
+	     || gimple_code (gsi_stmt (gsi)) == GIMPLE_PREDICT))
     gsi_next (&gsi);
 
   /* { ... something; default:; } */
diff --git a/gcc/predict.def b/gcc/predict.def
index f7b2bf7738c..326c39ae4c9 100644
--- a/gcc/predict.def
+++ b/gcc/predict.def
@@ -132,9 +132,8 @@  DEF_PREDICTOR (PRED_RECURSIVE_CALL, "recursive call", HITRATE (75), 0)
 DEF_PREDICTOR (PRED_TREE_EARLY_RETURN, "early return (on trees)", HITRATE (66),
 	       0)
 
-/* Branch containing goto is probably not taken.
-   FIXME: Currently not used.  */
-DEF_PREDICTOR (PRED_GOTO, "goto", HITRATE (70), 0)
+/* Branch containing goto is probably not taken.  */
+DEF_PREDICTOR (PRED_GOTO, "goto", HITRATE (66), 0)
 
 /* Branch ending with return constant is probably not taken.  */
 DEF_PREDICTOR (PRED_CONST_RETURN, "const return", HITRATE (69), 0)
diff --git a/gcc/testsuite/gcc.dg/predict-15.c b/gcc/testsuite/gcc.dg/predict-15.c
new file mode 100644
index 00000000000..2a8c3ea8597
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/predict-15.c
@@ -0,0 +1,17 @@ 
+/* { dg-do compile } */
+/* { dg-options "-O2 -fdump-tree-profile_estimate" } */
+
+int main(int argc, char **argv)
+{
+  if (argc == 123)
+    goto exit;
+  else
+    {
+      return 0;
+    }
+
+exit:
+  return 1;
+}
+
+/* { dg-final { scan-tree-dump "goto heuristics of edge" "profile_estimate"} } */
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/attr-hotcold-2.c b/gcc/testsuite/gcc.dg/tree-ssa/attr-hotcold-2.c
index 184dd10ddae..d5999e1bf6f 100644
--- a/gcc/testsuite/gcc.dg/tree-ssa/attr-hotcold-2.c
+++ b/gcc/testsuite/gcc.dg/tree-ssa/attr-hotcold-2.c
@@ -1,8 +1,7 @@ 
 /* { dg-do compile } */
-/* { dg-options "-O2 -fdump-tree-profile_estimate-blocks-details" } */
+/* { dg-options "-O2 -fdump-tree-profile_estimate" } */
 
-void g(void);
-void h(void);
+int v1, v2;
 void f(int x, int y)
 {
   if (x) goto A;
@@ -10,19 +9,19 @@  void f(int x, int y)
   return;
 
  A: __attribute__((cold))
-  g();
+  v1 = x;
   return;
 
  B: __attribute__((hot))
-  h();
+  v2 = y;
   return;
 }
 
 /* { dg-final { scan-tree-dump-times "hot label heuristics" 1 "profile_estimate" } } */
 /* { dg-final { scan-tree-dump-times "cold label heuristics" 1 "profile_estimate" } } */
-/* { dg-final { scan-tree-dump "A \\\[0\\\..*\\\]" "profile_estimate" } } */
+/* { dg-final { scan-tree-dump "combined heuristics: 0\\\..*" "profile_estimate" } } */
 
 /* Note: we're attempting to match some number > 6000, i.e. > 60%.
    The exact number ought to be tweekable without having to juggle
    the testcase around too much.  */
-/* { dg-final { scan-tree-dump "B \\\[\[6-9\]\[0-9\]\\\..*\\\]" "profile_estimate" } } */
+/* { dg-final { scan-tree-dump "combined heuristics: \[6-9\]\[0-9\]\\\..*" "profile_estimate" } } */
-- 
2.13.1