Message ID | f8105d40-90bd-725e-30d2-8921fa6a2c5f@suse.cz |
---|---|
State | New |
Headers | show |
Series | x86: Disable jump tables when retpolines are used (PR target/86952). | expand |
On Thu, Mar 7, 2019 at 9:45 AM Martin Liška <mliska@suse.cz> wrote: > > Hi. > > Thanks to Intel guys, we've done some re-measurement in PR86952 > about usage of jump tables when retpolines are used. > Numbers prove that disabling of JT should be the best for now. > > Patch can bootstrap on x86_64-linux-gnu and survives regression tests. > > Ready to be installed? > Thanks, > Martin Please add a comment above your change. Uros. > > gcc/ChangeLog: > > 2019-03-06 Martin Liska <mliska@suse.cz> > > PR target/86952 > * config/i386/i386.c (ix86_option_override_internal): Disable > jump tables when retpolines are used. > > gcc/testsuite/ChangeLog: > > 2019-03-06 Martin Liska <mliska@suse.cz> > > PR target/86952 > * gcc.target/i386/pr86952.c: New test. > * gcc.target/i386/indirect-thunk-7.c: Use jump tables to match > scanned pattern. > * gcc.target/i386/indirect-thunk-inline-7.c: Likewise. > --- > gcc/config/i386/i386.c | 4 ++++ > .../gcc.target/i386/indirect-thunk-7.c | 2 +- > .../gcc.target/i386/indirect-thunk-inline-7.c | 2 +- > gcc/testsuite/gcc.target/i386/pr86952.c | 23 +++++++++++++++++++ > 4 files changed, 29 insertions(+), 2 deletions(-) > create mode 100644 gcc/testsuite/gcc.target/i386/pr86952.c > >
On 3/7/19 9:54 AM, Uros Bizjak wrote: > On Thu, Mar 7, 2019 at 9:45 AM Martin Liška <mliska@suse.cz> wrote: >> >> Hi. >> >> Thanks to Intel guys, we've done some re-measurement in PR86952 >> about usage of jump tables when retpolines are used. >> Numbers prove that disabling of JT should be the best for now. >> >> Patch can bootstrap on x86_64-linux-gnu and survives regression tests. >> >> Ready to be installed? >> Thanks, >> Martin > > Please add a comment above your change. Sure, should be improved. Martin > > Uros. > >> >> gcc/ChangeLog: >> >> 2019-03-06 Martin Liska <mliska@suse.cz> >> >> PR target/86952 >> * config/i386/i386.c (ix86_option_override_internal): Disable >> jump tables when retpolines are used. >> >> gcc/testsuite/ChangeLog: >> >> 2019-03-06 Martin Liska <mliska@suse.cz> >> >> PR target/86952 >> * gcc.target/i386/pr86952.c: New test. >> * gcc.target/i386/indirect-thunk-7.c: Use jump tables to match >> scanned pattern. >> * gcc.target/i386/indirect-thunk-inline-7.c: Likewise. >> --- >> gcc/config/i386/i386.c | 4 ++++ >> .../gcc.target/i386/indirect-thunk-7.c | 2 +- >> .../gcc.target/i386/indirect-thunk-inline-7.c | 2 +- >> gcc/testsuite/gcc.target/i386/pr86952.c | 23 +++++++++++++++++++ >> 4 files changed, 29 insertions(+), 2 deletions(-) >> create mode 100644 gcc/testsuite/gcc.target/i386/pr86952.c >> >> From 54a0f3ed784c05bef0bdddcc6ae4e8677307d989 Mon Sep 17 00:00:00 2001 From: marxin <mliska@suse.cz> Date: Wed, 6 Mar 2019 13:05:50 +0100 Subject: [PATCH] x86: Disable jump tables when retpolines are used (PR target/86952). Jump tables are implement on x86_64 with: jmp *.L4(,%rdi,8) where L4 contains list of labels where to jump. When using retpolines, the instruction is replaced with: movq .L4(,%rdi,8), %rax jmp *%rax which bypasses/confuses indirect branch predictor and it's slow. In that case, a decision tree based on if condition is faster. gcc/ChangeLog: 2019-03-06 Martin Liska <mliska@suse.cz> PR target/86952 * config/i386/i386.c (ix86_option_override_internal): Disable jump tables when retpolines are used. gcc/testsuite/ChangeLog: 2019-03-06 Martin Liska <mliska@suse.cz> PR target/86952 * gcc.target/i386/pr86952.c: New test. * gcc.target/i386/indirect-thunk-7.c: Use jump tables to match scanned pattern. * gcc.target/i386/indirect-thunk-inline-7.c: Likewise. --- gcc/config/i386/i386.c | 4 ++++ .../gcc.target/i386/indirect-thunk-7.c | 2 +- .../gcc.target/i386/indirect-thunk-inline-7.c | 2 +- gcc/testsuite/gcc.target/i386/pr86952.c | 23 +++++++++++++++++++ 4 files changed, 29 insertions(+), 2 deletions(-) create mode 100644 gcc/testsuite/gcc.target/i386/pr86952.c diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c index c8f9957163b..37fe41260dd 100644 --- a/gcc/config/i386/i386.c +++ b/gcc/config/i386/i386.c @@ -4894,6 +4894,10 @@ ix86_option_override_internal (bool main_args_p, opts->x_param_values, opts_set->x_param_values); + if (ix86_indirect_branch != indirect_branch_keep + && !opts_set->x_flag_jump_tables) + opts->x_flag_jump_tables = 0; + return true; } diff --git a/gcc/testsuite/gcc.target/i386/indirect-thunk-7.c b/gcc/testsuite/gcc.target/i386/indirect-thunk-7.c index 3c72036dbaf..53868f46558 100644 --- a/gcc/testsuite/gcc.target/i386/indirect-thunk-7.c +++ b/gcc/testsuite/gcc.target/i386/indirect-thunk-7.c @@ -1,5 +1,5 @@ /* { dg-do compile } */ -/* { dg-options "-O2 -mno-indirect-branch-register -mfunction-return=keep -mindirect-branch=thunk -fno-pic" } */ +/* { dg-options "-O2 -mno-indirect-branch-register -mfunction-return=keep -mindirect-branch=thunk -fno-pic -fjump-tables" } */ void func0 (void); void func1 (void); diff --git a/gcc/testsuite/gcc.target/i386/indirect-thunk-inline-7.c b/gcc/testsuite/gcc.target/i386/indirect-thunk-inline-7.c index ea009245a58..e6f064959a1 100644 --- a/gcc/testsuite/gcc.target/i386/indirect-thunk-inline-7.c +++ b/gcc/testsuite/gcc.target/i386/indirect-thunk-inline-7.c @@ -1,5 +1,5 @@ /* { dg-do compile } */ -/* { dg-options "-O2 -mno-indirect-branch-register -mfunction-return=keep -mindirect-branch=thunk-inline -fno-pic" } */ +/* { dg-options "-O2 -mno-indirect-branch-register -mfunction-return=keep -mindirect-branch=thunk-inline -fno-pic -fjump-tables" } */ void func0 (void); void func1 (void); diff --git a/gcc/testsuite/gcc.target/i386/pr86952.c b/gcc/testsuite/gcc.target/i386/pr86952.c new file mode 100644 index 00000000000..3ff3e354878 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr86952.c @@ -0,0 +1,23 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -mindirect-branch=thunk -fdump-tree-switchlower1" } */ + +int global; + +int +foo (int x) +{ + switch (x & 7) + { + case 0: ; return 1722; + case 1: global += 1; return 1060; + case 2: ; return 1990; + case 3: ; return 1242; + case 4: ; return 1466; + case 5: ; return 894; + case 6: ; return 570; + case 7: ; return 572; + default: return 0; + } +} + +/* { dg-final { scan-tree-dump ";; GIMPLE switch case clusters: 1 2 3 4 5 6 7" "switchlower1" } } */
On Thu, Mar 7, 2019 at 10:50 AM Martin Liška <mliska@suse.cz> wrote: > > On 3/7/19 9:54 AM, Uros Bizjak wrote: > > On Thu, Mar 7, 2019 at 9:45 AM Martin Liška <mliska@suse.cz> wrote: > >> > >> Hi. > >> > >> Thanks to Intel guys, we've done some re-measurement in PR86952 > >> about usage of jump tables when retpolines are used. > >> Numbers prove that disabling of JT should be the best for now. > >> > >> Patch can bootstrap on x86_64-linux-gnu and survives regression tests. > >> > >> Ready to be installed? > >> Thanks, > >> Martin > > > > Please add a comment above your change. > > Sure, should be improved. Eh, we didn't understand each other... Please add comment here: + if (ix86_indirect_branch != indirect_branch_keep + && !opts_set->x_flag_jump_tables) + opts->x_flag_jump_tables = 0; so in future, it will still be documented why this part of the code is needed. Uros. > Martin > > > > > Uros. > > > >> > >> gcc/ChangeLog: > >> > >> 2019-03-06 Martin Liska <mliska@suse.cz> > >> > >> PR target/86952 > >> * config/i386/i386.c (ix86_option_override_internal): Disable > >> jump tables when retpolines are used. > >> > >> gcc/testsuite/ChangeLog: > >> > >> 2019-03-06 Martin Liska <mliska@suse.cz> > >> > >> PR target/86952 > >> * gcc.target/i386/pr86952.c: New test. > >> * gcc.target/i386/indirect-thunk-7.c: Use jump tables to match > >> scanned pattern. > >> * gcc.target/i386/indirect-thunk-inline-7.c: Likewise. > >> --- > >> gcc/config/i386/i386.c | 4 ++++ > >> .../gcc.target/i386/indirect-thunk-7.c | 2 +- > >> .../gcc.target/i386/indirect-thunk-inline-7.c | 2 +- > >> gcc/testsuite/gcc.target/i386/pr86952.c | 23 +++++++++++++++++++ > >> 4 files changed, 29 insertions(+), 2 deletions(-) > >> create mode 100644 gcc/testsuite/gcc.target/i386/pr86952.c > >> > >> >
On 3/7/19 11:27 AM, Uros Bizjak wrote: > On Thu, Mar 7, 2019 at 10:50 AM Martin Liška <mliska@suse.cz> wrote: >> >> On 3/7/19 9:54 AM, Uros Bizjak wrote: >>> On Thu, Mar 7, 2019 at 9:45 AM Martin Liška <mliska@suse.cz> wrote: >>>> >>>> Hi. >>>> >>>> Thanks to Intel guys, we've done some re-measurement in PR86952 >>>> about usage of jump tables when retpolines are used. >>>> Numbers prove that disabling of JT should be the best for now. >>>> >>>> Patch can bootstrap on x86_64-linux-gnu and survives regression tests. >>>> >>>> Ready to be installed? >>>> Thanks, >>>> Martin >>> >>> Please add a comment above your change. >> >> Sure, should be improved. > > Eh, we didn't understand each other... Please add comment here: Ah, this place :) > > + if (ix86_indirect_branch != indirect_branch_keep > + && !opts_set->x_flag_jump_tables) > + opts->x_flag_jump_tables = 0; > > so in future, it will still be documented why this part of the code is needed. Sure, updated. Martin > > Uros. > >> Martin >> >>> >>> Uros. >>> >>>> >>>> gcc/ChangeLog: >>>> >>>> 2019-03-06 Martin Liska <mliska@suse.cz> >>>> >>>> PR target/86952 >>>> * config/i386/i386.c (ix86_option_override_internal): Disable >>>> jump tables when retpolines are used. >>>> >>>> gcc/testsuite/ChangeLog: >>>> >>>> 2019-03-06 Martin Liska <mliska@suse.cz> >>>> >>>> PR target/86952 >>>> * gcc.target/i386/pr86952.c: New test. >>>> * gcc.target/i386/indirect-thunk-7.c: Use jump tables to match >>>> scanned pattern. >>>> * gcc.target/i386/indirect-thunk-inline-7.c: Likewise. >>>> --- >>>> gcc/config/i386/i386.c | 4 ++++ >>>> .../gcc.target/i386/indirect-thunk-7.c | 2 +- >>>> .../gcc.target/i386/indirect-thunk-inline-7.c | 2 +- >>>> gcc/testsuite/gcc.target/i386/pr86952.c | 23 +++++++++++++++++++ >>>> 4 files changed, 29 insertions(+), 2 deletions(-) >>>> create mode 100644 gcc/testsuite/gcc.target/i386/pr86952.c >>>> >>>> >> From f78914272ba7a9fd19db65876a4d4cab576ddf0f Mon Sep 17 00:00:00 2001 From: marxin <mliska@suse.cz> Date: Wed, 6 Mar 2019 13:05:50 +0100 Subject: [PATCH] x86: Disable jump tables when retpolines are used (PR target/86952). Jump tables are implement on x86_64 with: jmp *.L4(,%rdi,8) where L4 contains list of labels where to jump. When using retpolines, the instruction is replaced with: movq .L4(,%rdi,8), %rax jmp *%rax which bypasses/confuses indirect branch predictor and it's slow. In that case, a decision tree based on if condition is faster. gcc/ChangeLog: 2019-03-06 Martin Liska <mliska@suse.cz> PR target/86952 * config/i386/i386.c (ix86_option_override_internal): Disable jump tables when retpolines are used. gcc/testsuite/ChangeLog: 2019-03-06 Martin Liska <mliska@suse.cz> PR target/86952 * gcc.target/i386/pr86952.c: New test. * gcc.target/i386/indirect-thunk-7.c: Use jump tables to match scanned pattern. * gcc.target/i386/indirect-thunk-inline-7.c: Likewise. --- gcc/config/i386/i386.c | 6 +++++ .../gcc.target/i386/indirect-thunk-7.c | 2 +- .../gcc.target/i386/indirect-thunk-inline-7.c | 2 +- gcc/testsuite/gcc.target/i386/pr86952.c | 23 +++++++++++++++++++ 4 files changed, 31 insertions(+), 2 deletions(-) create mode 100644 gcc/testsuite/gcc.target/i386/pr86952.c diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c index c8f9957163b..71e5cfd2897 100644 --- a/gcc/config/i386/i386.c +++ b/gcc/config/i386/i386.c @@ -4894,6 +4894,12 @@ ix86_option_override_internal (bool main_args_p, opts->x_param_values, opts_set->x_param_values); + /* PR86952: jump table usage with retpolines is slow. + The PR provides some numbers about the slowness. */ + if (ix86_indirect_branch != indirect_branch_keep + && !opts_set->x_flag_jump_tables) + opts->x_flag_jump_tables = 0; + return true; } diff --git a/gcc/testsuite/gcc.target/i386/indirect-thunk-7.c b/gcc/testsuite/gcc.target/i386/indirect-thunk-7.c index 3c72036dbaf..53868f46558 100644 --- a/gcc/testsuite/gcc.target/i386/indirect-thunk-7.c +++ b/gcc/testsuite/gcc.target/i386/indirect-thunk-7.c @@ -1,5 +1,5 @@ /* { dg-do compile } */ -/* { dg-options "-O2 -mno-indirect-branch-register -mfunction-return=keep -mindirect-branch=thunk -fno-pic" } */ +/* { dg-options "-O2 -mno-indirect-branch-register -mfunction-return=keep -mindirect-branch=thunk -fno-pic -fjump-tables" } */ void func0 (void); void func1 (void); diff --git a/gcc/testsuite/gcc.target/i386/indirect-thunk-inline-7.c b/gcc/testsuite/gcc.target/i386/indirect-thunk-inline-7.c index ea009245a58..e6f064959a1 100644 --- a/gcc/testsuite/gcc.target/i386/indirect-thunk-inline-7.c +++ b/gcc/testsuite/gcc.target/i386/indirect-thunk-inline-7.c @@ -1,5 +1,5 @@ /* { dg-do compile } */ -/* { dg-options "-O2 -mno-indirect-branch-register -mfunction-return=keep -mindirect-branch=thunk-inline -fno-pic" } */ +/* { dg-options "-O2 -mno-indirect-branch-register -mfunction-return=keep -mindirect-branch=thunk-inline -fno-pic -fjump-tables" } */ void func0 (void); void func1 (void); diff --git a/gcc/testsuite/gcc.target/i386/pr86952.c b/gcc/testsuite/gcc.target/i386/pr86952.c new file mode 100644 index 00000000000..3ff3e354878 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr86952.c @@ -0,0 +1,23 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -mindirect-branch=thunk -fdump-tree-switchlower1" } */ + +int global; + +int +foo (int x) +{ + switch (x & 7) + { + case 0: ; return 1722; + case 1: global += 1; return 1060; + case 2: ; return 1990; + case 3: ; return 1242; + case 4: ; return 1466; + case 5: ; return 894; + case 6: ; return 570; + case 7: ; return 572; + default: return 0; + } +} + +/* { dg-final { scan-tree-dump ";; GIMPLE switch case clusters: 1 2 3 4 5 6 7" "switchlower1" } } */
> Hi. > > Thanks to Intel guys, we've done some re-measurement in PR86952 > about usage of jump tables when retpolines are used. > Numbers prove that disabling of JT should be the best for now. > > Patch can bootstrap on x86_64-linux-gnu and survives regression tests. > > Ready to be installed? OK, thanks! I wonder if there is some threshold for extremely large jumptables where branchy sequence will loose, but I think it is better to disable them than what we have right now. Honza
On 3/8/19 1:44 PM, Jan Hubicka wrote: >> Hi. >> >> Thanks to Intel guys, we've done some re-measurement in PR86952 >> about usage of jump tables when retpolines are used. >> Numbers prove that disabling of JT should be the best for now. >> >> Patch can bootstrap on x86_64-linux-gnu and survives regression tests. >> >> Ready to be installed? > > OK, thanks! > I wonder if there is some threshold for extremely large jumptables where > branchy sequence will loose, but I think it is better to disable them > than what we have right now. I tested switch statements up to 4096 and it was still slower ;) Martin > > Honza >
> On 3/8/19 1:44 PM, Jan Hubicka wrote: > >> Hi. > >> > >> Thanks to Intel guys, we've done some re-measurement in PR86952 > >> about usage of jump tables when retpolines are used. > >> Numbers prove that disabling of JT should be the best for now. > >> > >> Patch can bootstrap on x86_64-linux-gnu and survives regression tests. > >> > >> Ready to be installed? > > > > OK, thanks! > > I wonder if there is some threshold for extremely large jumptables where > > branchy sequence will loose, but I think it is better to disable them > > than what we have right now. > > I tested switch statements up to 4096 and it was still slower ;) Well, we have switch statements with this many cases in insn-attrtab/latencytab tables. I suppose what kind of code you compile with retpolines, but I would expect even bigger switch statements to appear in real code (large DFA automatons and such) Honza > > Martin > > > > > Honza > > >
On 3/8/19 2:58 PM, Jan Hubicka wrote: >> On 3/8/19 1:44 PM, Jan Hubicka wrote: >>>> Hi. >>>> >>>> Thanks to Intel guys, we've done some re-measurement in PR86952 >>>> about usage of jump tables when retpolines are used. >>>> Numbers prove that disabling of JT should be the best for now. >>>> >>>> Patch can bootstrap on x86_64-linux-gnu and survives regression tests. >>>> >>>> Ready to be installed? >>> >>> OK, thanks! >>> I wonder if there is some threshold for extremely large jumptables where >>> branchy sequence will loose, but I think it is better to disable them >>> than what we have right now. >> >> I tested switch statements up to 4096 and it was still slower ;) > > Well, we have switch statements with this many cases in > insn-attrtab/latencytab tables. I suppose what kind of code you compile > with retpolines, but I would expect even bigger switch statements to > appear in real code (large DFA automatons and such) Question is whether you'll meet such huge switch statements in Linux kernel? Martin > > Honza >> >> Martin >> >>> >>> Honza >>> >>
On Fri, Mar 08, 2019 at 02:50:26PM +0100, Martin Liška wrote: > On 3/8/19 1:44 PM, Jan Hubicka wrote: > >> Hi. > >> > >> Thanks to Intel guys, we've done some re-measurement in PR86952 > >> about usage of jump tables when retpolines are used. > >> Numbers prove that disabling of JT should be the best for now. > >> > >> Patch can bootstrap on x86_64-linux-gnu and survives regression tests. > >> > >> Ready to be installed? > > > > OK, thanks! > > I wonder if there is some threshold for extremely large jumptables where > > branchy sequence will loose, but I think it is better to disable them > > than what we have right now. > > I tested switch statements up to 4096 and it was still slower ;) Try one with 10000000 of entries ;) and also compare code size and data segment size. Jakub
diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c index c8f9957163b..37fe41260dd 100644 --- a/gcc/config/i386/i386.c +++ b/gcc/config/i386/i386.c @@ -4894,6 +4894,10 @@ ix86_option_override_internal (bool main_args_p, opts->x_param_values, opts_set->x_param_values); + if (ix86_indirect_branch != indirect_branch_keep + && !opts_set->x_flag_jump_tables) + opts->x_flag_jump_tables = 0; + return true; } diff --git a/gcc/testsuite/gcc.target/i386/indirect-thunk-7.c b/gcc/testsuite/gcc.target/i386/indirect-thunk-7.c index 3c72036dbaf..53868f46558 100644 --- a/gcc/testsuite/gcc.target/i386/indirect-thunk-7.c +++ b/gcc/testsuite/gcc.target/i386/indirect-thunk-7.c @@ -1,5 +1,5 @@ /* { dg-do compile } */ -/* { dg-options "-O2 -mno-indirect-branch-register -mfunction-return=keep -mindirect-branch=thunk -fno-pic" } */ +/* { dg-options "-O2 -mno-indirect-branch-register -mfunction-return=keep -mindirect-branch=thunk -fno-pic -fjump-tables" } */ void func0 (void); void func1 (void); diff --git a/gcc/testsuite/gcc.target/i386/indirect-thunk-inline-7.c b/gcc/testsuite/gcc.target/i386/indirect-thunk-inline-7.c index ea009245a58..e6f064959a1 100644 --- a/gcc/testsuite/gcc.target/i386/indirect-thunk-inline-7.c +++ b/gcc/testsuite/gcc.target/i386/indirect-thunk-inline-7.c @@ -1,5 +1,5 @@ /* { dg-do compile } */ -/* { dg-options "-O2 -mno-indirect-branch-register -mfunction-return=keep -mindirect-branch=thunk-inline -fno-pic" } */ +/* { dg-options "-O2 -mno-indirect-branch-register -mfunction-return=keep -mindirect-branch=thunk-inline -fno-pic -fjump-tables" } */ void func0 (void); void func1 (void); diff --git a/gcc/testsuite/gcc.target/i386/pr86952.c b/gcc/testsuite/gcc.target/i386/pr86952.c new file mode 100644 index 00000000000..3ff3e354878 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr86952.c @@ -0,0 +1,23 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -mindirect-branch=thunk -fdump-tree-switchlower1" } */ + +int global; + +int +foo (int x) +{ + switch (x & 7) + { + case 0: ; return 1722; + case 1: global += 1; return 1060; + case 2: ; return 1990; + case 3: ; return 1242; + case 4: ; return 1466; + case 5: ; return 894; + case 6: ; return 570; + case 7: ; return 572; + default: return 0; + } +} + +/* { dg-final { scan-tree-dump ";; GIMPLE switch case clusters: 1 2 3 4 5 6 7" "switchlower1" } } */