Message ID | 20170427120006.20564-13-rth@twiddle.net |
---|---|
State | New |
Headers | show |
Richard Henderson <rth@twiddle.net> writes: > From: "Emilio G. Cota" <cota@braap.org> > > Speed up indirect branches by jumping to the target if it is valid. > > Softmmu measurements (see later commit for user-mode numbers): > > Note: baseline (i.e. speedup == 1x) is QEMU v2.9.0. > > - SPECint06 (test set), x86_64-softmmu (Ubuntu 16.04 guest). Host: Intel i7-4790K @ 4.00GHz > > 2.4x +-+--------------------------------------------------------------------------------------------------------------+-+ > | | > | cross | > 2.2x +cross+jr..........................................................................+++...........................+-+ > | | | > | +++ | | > 2x +-+..............................................................................|..|............................+-+ > | | | | > | | | | > 1.8x +-+..............................................................................|####...........................+-+ > | |# |# | > | **** |# | > 1.6x +-+............................................................................*.|*.|#...........................+-+ > | * |* |# | > | * |* |# | > 1.4x +-+.......................................................................+++..*.|*.|#...........................+-+ > | ++++++ #### * |*++# +++ | > | +++ | | #++# *++* # +++ | | > 1.2x +-+......................###.....####....+++............|..|...........****..#.*..*..#....####...|.###.....####..+-+ > | +++ **** # **** # #### ***### *++* # * * # #++# ****|# +++#++# | > | ****### +++ *++* # *++* # ++# # #### *|* |# +++ * * # * * # *** # *| *|# **** # | > 1x +-++-*++*++#++***###++*++*+#++*+-*++#+****++#++***++#+-*+*++#-+****##++*++*-+#+*++*-+#++*+*++#++*-+*+#++*++*++#-++-+ > | * * # * * # * * # * * # * * # * * # *|* |# *++* # * * # * * # * * # * * # * * # | > | * * # * * # * * # * * # * * # * * # *+*++# * * # * * # * * # * * # * * # * * # | > 0.8x +-+--****###--***###--****##--****###-****###--***###--***###--****##--****###-****###--***###--****##--****###--+-+ > astar bzip2 gcc gobmk h264ref hmmlibquantum mcf omnetpperlbench sjengxalancbmk hmean > png: http://imgur.com/DU36YFU > > NB. 'cross' represents the previous commit. > > Reviewed-by: Richard Henderson <rth@twiddle.net> > Signed-off-by: Emilio G. Cota <cota@braap.org> > Message-Id: <1493263764-18657-11-git-send-email-cota@braap.org> > Signed-off-by: Richard Henderson <rth@twiddle.net> Reviewed-by: Alex Bennée <alex.bennee@linaro.org> > --- > target/i386/translate.c | 14 ++++++++------ > 1 file changed, 8 insertions(+), 6 deletions(-) > > diff --git a/target/i386/translate.c b/target/i386/translate.c > index ea113fe..674ec96 100644 > --- a/target/i386/translate.c > +++ b/target/i386/translate.c > @@ -4996,7 +4996,7 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s, > gen_push_v(s, cpu_T1); > gen_op_jmp_v(cpu_T0); > gen_bnd_jmp(s); > - gen_eob(s); > + gen_jr(s, cpu_T0); > break; > case 3: /* lcall Ev */ > gen_op_ld_v(s, ot, cpu_T1, cpu_A0); > @@ -5014,7 +5014,8 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s, > tcg_const_i32(dflag - 1), > tcg_const_i32(s->pc - s->cs_base)); > } > - gen_eob(s); > + tcg_gen_ld_tl(cpu_tmp4, cpu_env, offsetof(CPUX86State, eip)); > + gen_jr(s, cpu_tmp4); > break; > case 4: /* jmp Ev */ > if (dflag == MO_16) { > @@ -5022,7 +5023,7 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s, > } > gen_op_jmp_v(cpu_T0); > gen_bnd_jmp(s); > - gen_eob(s); > + gen_jr(s, cpu_T0); > break; > case 5: /* ljmp Ev */ > gen_op_ld_v(s, ot, cpu_T1, cpu_A0); > @@ -5037,7 +5038,8 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s, > gen_op_movl_seg_T0_vm(R_CS); > gen_op_jmp_v(cpu_T1); > } > - gen_eob(s); > + tcg_gen_ld_tl(cpu_tmp4, cpu_env, offsetof(CPUX86State, eip)); > + gen_jr(s, cpu_tmp4); > break; > case 6: /* push Ev */ > gen_push_v(s, cpu_T0); > @@ -6417,7 +6419,7 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s, > /* Note that gen_pop_T0 uses a zero-extending load. */ > gen_op_jmp_v(cpu_T0); > gen_bnd_jmp(s); > - gen_eob(s); > + gen_jr(s, cpu_T0); > break; > case 0xc3: /* ret */ > ot = gen_pop_T0(s); > @@ -6425,7 +6427,7 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s, > /* Note that gen_pop_T0 uses a zero-extending load. */ > gen_op_jmp_v(cpu_T0); > gen_bnd_jmp(s); > - gen_eob(s); > + gen_jr(s, cpu_T0); > break; > case 0xca: /* lret im */ > val = cpu_ldsw_code(env, s->pc); -- Alex Bennée
diff --git a/target/i386/translate.c b/target/i386/translate.c index ea113fe..674ec96 100644 --- a/target/i386/translate.c +++ b/target/i386/translate.c @@ -4996,7 +4996,7 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s, gen_push_v(s, cpu_T1); gen_op_jmp_v(cpu_T0); gen_bnd_jmp(s); - gen_eob(s); + gen_jr(s, cpu_T0); break; case 3: /* lcall Ev */ gen_op_ld_v(s, ot, cpu_T1, cpu_A0); @@ -5014,7 +5014,8 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s, tcg_const_i32(dflag - 1), tcg_const_i32(s->pc - s->cs_base)); } - gen_eob(s); + tcg_gen_ld_tl(cpu_tmp4, cpu_env, offsetof(CPUX86State, eip)); + gen_jr(s, cpu_tmp4); break; case 4: /* jmp Ev */ if (dflag == MO_16) { @@ -5022,7 +5023,7 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s, } gen_op_jmp_v(cpu_T0); gen_bnd_jmp(s); - gen_eob(s); + gen_jr(s, cpu_T0); break; case 5: /* ljmp Ev */ gen_op_ld_v(s, ot, cpu_T1, cpu_A0); @@ -5037,7 +5038,8 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s, gen_op_movl_seg_T0_vm(R_CS); gen_op_jmp_v(cpu_T1); } - gen_eob(s); + tcg_gen_ld_tl(cpu_tmp4, cpu_env, offsetof(CPUX86State, eip)); + gen_jr(s, cpu_tmp4); break; case 6: /* push Ev */ gen_push_v(s, cpu_T0); @@ -6417,7 +6419,7 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s, /* Note that gen_pop_T0 uses a zero-extending load. */ gen_op_jmp_v(cpu_T0); gen_bnd_jmp(s); - gen_eob(s); + gen_jr(s, cpu_T0); break; case 0xc3: /* ret */ ot = gen_pop_T0(s); @@ -6425,7 +6427,7 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s, /* Note that gen_pop_T0 uses a zero-extending load. */ gen_op_jmp_v(cpu_T0); gen_bnd_jmp(s); - gen_eob(s); + gen_jr(s, cpu_T0); break; case 0xca: /* lret im */ val = cpu_ldsw_code(env, s->pc);