Message ID | 1349526621-13939-15-git-send-email-pbonzini@redhat.com |
---|---|
State | New |
Headers | show |
On Sat, Oct 6, 2012 at 12:30 PM, Paolo Bonzini <pbonzini@redhat.com> wrote: > Reconstruct the arguments for complex conditions involving CC_OP_SUBx (BE, > L, LE). In the others do it via setcond and gen_setcc_slow (which is > not that slow in many cases). I think it would be useful to reconstruct also for add, inc and dec along the same lines, the others are probably not so often used. > > Signed-off-by: Paolo Bonzini <pbonzini@redhat.com> > --- > target-i386/translate.c | 93 +++++++++++++++++++------------------------------ > 1 file modificato, 36 inserzioni(+), 57 rimozioni(-) > > diff --git a/target-i386/translate.c b/target-i386/translate.c > index 342b9ec..92e8291 100644 > --- a/target-i386/translate.c > +++ b/target-i386/translate.c > @@ -1063,55 +1063,55 @@ static inline void gen_setcc_slow(DisasContext *s, int jcc_op, TCGv reg, bool in > } > } > > -/* return true if setcc_slow is not needed (WARNING: must be kept in > - sync with gen_jcc1) */ > -static int is_fast_jcc_case(DisasContext *s, int b) > +/* perform a conditional store into register 'reg' according to jump opcode > + value 'b'. In the fast case, T0 is guaranted not to be used. */ > +static inline void gen_setcc1(DisasContext *s, int b, TCGv reg) > { > - int jcc_op; > + int inv, jcc_op, size, cond; > + TCGv t0; > + > + inv = b & 1; > jcc_op = (b >> 1) & 7; > + > switch(s->cc_op) { > - /* we optimize the cmp/jcc case */ > + /* we optimize relational operators for the cmp/jcc case */ > case CC_OP_SUBB: > case CC_OP_SUBW: > case CC_OP_SUBL: > case CC_OP_SUBQ: > - if (jcc_op == JCC_O || jcc_op == JCC_P) > - goto slow_jcc; > - break; > - > - /* some jumps are easy to compute */ > - case CC_OP_ADDB: > - case CC_OP_ADDW: > - case CC_OP_ADDL: > - case CC_OP_ADDQ: > - > - case CC_OP_LOGICB: > - case CC_OP_LOGICW: > - case CC_OP_LOGICL: > - case CC_OP_LOGICQ: > - > - case CC_OP_INCB: > - case CC_OP_INCW: > - case CC_OP_INCL: > - case CC_OP_INCQ: > + size = s->cc_op - CC_OP_SUBB; > + switch(jcc_op) { > + case JCC_BE: > + cond = inv ? TCG_COND_GTU : TCG_COND_LEU; > + tcg_gen_add_tl(cpu_tmp4, cpu_cc_dst, cpu_cc_src); > + gen_extu(size, cpu_tmp4); > + t0 = gen_ext_tl(cpu_tmp0, cpu_cc_src, size, false); > + tcg_gen_setcond_tl(cond, reg, cpu_tmp4, t0); > + break; > > - case CC_OP_DECB: > - case CC_OP_DECW: > - case CC_OP_DECL: > - case CC_OP_DECQ: > + case JCC_L: > + cond = inv ? TCG_COND_GE : TCG_COND_LT; > + goto fast_jcc_l; > + case JCC_LE: > + cond = inv ? TCG_COND_GT : TCG_COND_LE; > + fast_jcc_l: > + tcg_gen_add_tl(cpu_tmp4, cpu_cc_dst, cpu_cc_src); > + gen_exts(size, cpu_tmp4); > + t0 = gen_ext_tl(cpu_tmp0, cpu_cc_src, size, true); > + tcg_gen_setcond_tl(cond, reg, cpu_tmp4, t0); > + break; > > - case CC_OP_SHLB: > - case CC_OP_SHLW: > - case CC_OP_SHLL: > - case CC_OP_SHLQ: > - if (jcc_op != JCC_Z && jcc_op != JCC_S) > + default: > goto slow_jcc; > + } > break; > + > default: > slow_jcc: > - return 0; > + /* gen_setcc_slow actually generates good code for JC, JZ and JS */ > + gen_setcc_slow(s, jcc_op, reg, inv); > + break; > } > - return 1; > } > > /* generate a conditional jump to label 'l1' according to jump opcode > @@ -2477,28 +2477,7 @@ static inline void gen_jcc(DisasContext *s, int b, > > static void gen_setcc(DisasContext *s, int b) > { > - int inv, jcc_op, l1; > - TCGv t0; > - > - if (is_fast_jcc_case(s, b)) { > - /* nominal case: we use a jump */ > - /* XXX: make it faster by adding new instructions in TCG */ > - t0 = tcg_temp_local_new(); > - tcg_gen_movi_tl(t0, 0); > - l1 = gen_new_label(); > - gen_jcc1(s, b ^ 1, l1); > - tcg_gen_movi_tl(t0, 1); > - gen_set_label(l1); > - tcg_gen_mov_tl(cpu_T[0], t0); > - tcg_temp_free(t0); > - } else { > - /* slow case: it is more efficient not to generate a jump, > - although it is questionnable whether this optimization is > - worth to */ > - inv = b & 1; > - jcc_op = (b >> 1) & 7; > - gen_setcc_slow(s, jcc_op, cpu_T[0], inv); > - } > + gen_setcc1(s, b, cpu_T[0]); > } > > static inline void gen_op_movl_T0_seg(int seg_reg) > -- > 1.7.12.1 > >
On 10/06/2012 05:30 AM, Paolo Bonzini wrote: > +static inline void gen_setcc1(DisasContext *s, int b, TCGv reg) > { > + int inv, jcc_op, size, cond; > + TCGv t0; > + > + inv = b & 1; > jcc_op = (b >> 1) & 7; > + > switch(s->cc_op) { > + /* we optimize relational operators for the cmp/jcc case */ > case CC_OP_SUBB: > case CC_OP_SUBW: > case CC_OP_SUBL: > case CC_OP_SUBQ: > + size = s->cc_op - CC_OP_SUBB; > + switch(jcc_op) { > + case JCC_BE: > + cond = inv ? TCG_COND_GTU : TCG_COND_LEU; > + tcg_gen_add_tl(cpu_tmp4, cpu_cc_dst, cpu_cc_src); > + gen_extu(size, cpu_tmp4); > + t0 = gen_ext_tl(cpu_tmp0, cpu_cc_src, size, false); > + tcg_gen_setcond_tl(cond, reg, cpu_tmp4, t0); > + break; I don't think this patch is going in the right direction. In particular, this is going to be largely redundant with gen_jcc1. Instead, c.f. the DisasCompare structure now present in target-sparc/, or a similar DisasCompare structure present in my jumbo target-s390x patch set. Here we use common code to generate a comparison, which can then be fed into brcond, setcond, or movcond as desired. I think that this Compare structure should be fed to gen_compute_eflags_* so that a parent gen_condition routine can make use of them for simple conditions like z/nz. At which point gen_jcc1 and gen_setcc1 become fairly trivial routines. r~
Il 09/10/2012 22:22, Richard Henderson ha scritto: > On 10/06/2012 05:30 AM, Paolo Bonzini wrote: >> +static inline void gen_setcc1(DisasContext *s, int b, TCGv reg) >> { >> + int inv, jcc_op, size, cond; >> + TCGv t0; >> + >> + inv = b & 1; >> jcc_op = (b >> 1) & 7; >> + >> switch(s->cc_op) { >> + /* we optimize relational operators for the cmp/jcc case */ >> case CC_OP_SUBB: >> case CC_OP_SUBW: >> case CC_OP_SUBL: >> case CC_OP_SUBQ: >> + size = s->cc_op - CC_OP_SUBB; >> + switch(jcc_op) { >> + case JCC_BE: >> + cond = inv ? TCG_COND_GTU : TCG_COND_LEU; >> + tcg_gen_add_tl(cpu_tmp4, cpu_cc_dst, cpu_cc_src); >> + gen_extu(size, cpu_tmp4); >> + t0 = gen_ext_tl(cpu_tmp0, cpu_cc_src, size, false); >> + tcg_gen_setcond_tl(cond, reg, cpu_tmp4, t0); >> + break; > > I don't think this patch is going in the right direction. In particular, > this is going to be largely redundant with gen_jcc1. Yes, it is. That's something I had started after posting this series, but didn't finish in time for the weekend... :) You can look at a few more changes in the eflags2 branch of my github repo, including: - delaying the actual generation of conditions, so that they can be used in setcond/brcond/movcond - optimization of setle/setl similar to setbe (shift OF onto SF, XOR, mask to SF or SF+ZF, after which you can already do a brcond) There are also TCG changes that add zero-bit tracking to optimize.c to eliminate redundant ext (leading to both better code generation and better copy propagation). Paolo > Instead, c.f. the DisasCompare structure now present in target-sparc/, > or a similar DisasCompare structure present in my jumbo target-s390x > patch set. Here we use common code to generate a comparison, which > can then be fed into brcond, setcond, or movcond as desired. > > I think that this Compare structure should be fed to gen_compute_eflags_* > so that a parent gen_condition routine can make use of them for simple > conditions like z/nz. > > At which point gen_jcc1 and gen_setcc1 become fairly trivial routines. > > > r~ > >
diff --git a/target-i386/translate.c b/target-i386/translate.c index 342b9ec..92e8291 100644 --- a/target-i386/translate.c +++ b/target-i386/translate.c @@ -1063,55 +1063,55 @@ static inline void gen_setcc_slow(DisasContext *s, int jcc_op, TCGv reg, bool in } } -/* return true if setcc_slow is not needed (WARNING: must be kept in - sync with gen_jcc1) */ -static int is_fast_jcc_case(DisasContext *s, int b) +/* perform a conditional store into register 'reg' according to jump opcode + value 'b'. In the fast case, T0 is guaranted not to be used. */ +static inline void gen_setcc1(DisasContext *s, int b, TCGv reg) { - int jcc_op; + int inv, jcc_op, size, cond; + TCGv t0; + + inv = b & 1; jcc_op = (b >> 1) & 7; + switch(s->cc_op) { - /* we optimize the cmp/jcc case */ + /* we optimize relational operators for the cmp/jcc case */ case CC_OP_SUBB: case CC_OP_SUBW: case CC_OP_SUBL: case CC_OP_SUBQ: - if (jcc_op == JCC_O || jcc_op == JCC_P) - goto slow_jcc; - break; - - /* some jumps are easy to compute */ - case CC_OP_ADDB: - case CC_OP_ADDW: - case CC_OP_ADDL: - case CC_OP_ADDQ: - - case CC_OP_LOGICB: - case CC_OP_LOGICW: - case CC_OP_LOGICL: - case CC_OP_LOGICQ: - - case CC_OP_INCB: - case CC_OP_INCW: - case CC_OP_INCL: - case CC_OP_INCQ: + size = s->cc_op - CC_OP_SUBB; + switch(jcc_op) { + case JCC_BE: + cond = inv ? TCG_COND_GTU : TCG_COND_LEU; + tcg_gen_add_tl(cpu_tmp4, cpu_cc_dst, cpu_cc_src); + gen_extu(size, cpu_tmp4); + t0 = gen_ext_tl(cpu_tmp0, cpu_cc_src, size, false); + tcg_gen_setcond_tl(cond, reg, cpu_tmp4, t0); + break; - case CC_OP_DECB: - case CC_OP_DECW: - case CC_OP_DECL: - case CC_OP_DECQ: + case JCC_L: + cond = inv ? TCG_COND_GE : TCG_COND_LT; + goto fast_jcc_l; + case JCC_LE: + cond = inv ? TCG_COND_GT : TCG_COND_LE; + fast_jcc_l: + tcg_gen_add_tl(cpu_tmp4, cpu_cc_dst, cpu_cc_src); + gen_exts(size, cpu_tmp4); + t0 = gen_ext_tl(cpu_tmp0, cpu_cc_src, size, true); + tcg_gen_setcond_tl(cond, reg, cpu_tmp4, t0); + break; - case CC_OP_SHLB: - case CC_OP_SHLW: - case CC_OP_SHLL: - case CC_OP_SHLQ: - if (jcc_op != JCC_Z && jcc_op != JCC_S) + default: goto slow_jcc; + } break; + default: slow_jcc: - return 0; + /* gen_setcc_slow actually generates good code for JC, JZ and JS */ + gen_setcc_slow(s, jcc_op, reg, inv); + break; } - return 1; } /* generate a conditional jump to label 'l1' according to jump opcode @@ -2477,28 +2477,7 @@ static inline void gen_jcc(DisasContext *s, int b, static void gen_setcc(DisasContext *s, int b) { - int inv, jcc_op, l1; - TCGv t0; - - if (is_fast_jcc_case(s, b)) { - /* nominal case: we use a jump */ - /* XXX: make it faster by adding new instructions in TCG */ - t0 = tcg_temp_local_new(); - tcg_gen_movi_tl(t0, 0); - l1 = gen_new_label(); - gen_jcc1(s, b ^ 1, l1); - tcg_gen_movi_tl(t0, 1); - gen_set_label(l1); - tcg_gen_mov_tl(cpu_T[0], t0); - tcg_temp_free(t0); - } else { - /* slow case: it is more efficient not to generate a jump, - although it is questionnable whether this optimization is - worth to */ - inv = b & 1; - jcc_op = (b >> 1) & 7; - gen_setcc_slow(s, jcc_op, cpu_T[0], inv); - } + gen_setcc1(s, b, cpu_T[0]); } static inline void gen_op_movl_T0_seg(int seg_reg)
Reconstruct the arguments for complex conditions involving CC_OP_SUBx (BE, L, LE). In the others do it via setcond and gen_setcc_slow (which is not that slow in many cases). Signed-off-by: Paolo Bonzini <pbonzini@redhat.com> --- target-i386/translate.c | 93 +++++++++++++++++++------------------------------ 1 file modificato, 36 inserzioni(+), 57 rimozioni(-)