Message ID | 1253313049-25874-2-git-send-email-andre.przywara@amd.com |
---|---|
State | Superseded |
Headers | show |
On Sat, Sep 19, 2009 at 12:30:46AM +0200, Andre Przywara wrote: > lzcnt is an AMD Phenom/Barcelona added instruction returning the > number of leading zero bits in a word. > As this is similar to the "bsr" instruction, reuse the existing > code. There need to be some more changes, though, as lzcnt always > returns a valid value (in opposite to bsr, which has a special > case when the operand is 0). > lzcnt is guarded by the ABM CPUID bit (Fn8000_0001:ECX[5]). While it's probably a good idea to reuse the existing bsr code, I think they should be different helpers. In helper.c, bsr and lzcnt helpers can then call the same function with different arguments. > Signed-off-by: Andre Przywara <andre.przywara@amd.com> > --- > target-i386/helper.h | 2 +- > target-i386/op_helper.c | 11 ++++++++--- > target-i386/translate.c | 37 +++++++++++++++++++++++++------------ > 3 files changed, 34 insertions(+), 16 deletions(-) > > diff --git a/target-i386/helper.h b/target-i386/helper.h > index 68d57b1..38d0708 100644 > --- a/target-i386/helper.h > +++ b/target-i386/helper.h > @@ -191,7 +191,7 @@ DEF_HELPER_2(frstor, void, tl, int) > DEF_HELPER_2(fxsave, void, tl, int) > DEF_HELPER_2(fxrstor, void, tl, int) > DEF_HELPER_1(bsf, tl, tl) > -DEF_HELPER_1(bsr, tl, tl) > +DEF_HELPER_2(bsr, tl, tl, int) > > /* MMX/SSE */ > > diff --git a/target-i386/op_helper.c b/target-i386/op_helper.c > index c3f5af6..9ffda36 100644 > --- a/target-i386/op_helper.c > +++ b/target-i386/op_helper.c > @@ -5457,11 +5457,14 @@ target_ulong helper_bsf(target_ulong t0) > return count; > } > > -target_ulong helper_bsr(target_ulong t0) > +target_ulong helper_bsr(target_ulong t0, int lzcnt) > { > int count; > target_ulong res, mask; > - > + > + if (lzcnt > 0 && t0 == 0) { > + return lzcnt; > + } > res = t0; > count = TARGET_LONG_BITS - 1; > mask = (target_ulong)1 << (TARGET_LONG_BITS - 1); > @@ -5469,10 +5472,12 @@ target_ulong helper_bsr(target_ulong t0) > count--; > res <<= 1; > } > + if (lzcnt > 0) { > + return lzcnt - 1 - count; > + } > return count; > } > > - > static int compute_all_eflags(void) > { > return CC_SRC; > diff --git a/target-i386/translate.c b/target-i386/translate.c > index 335fc08..aaa4492 100644 > --- a/target-i386/translate.c > +++ b/target-i386/translate.c > @@ -6538,22 +6538,35 @@ static target_ulong disas_insn(DisasContext *s, target_ulong pc_start) > reg = ((modrm >> 3) & 7) | rex_r; > gen_ldst_modrm(s, modrm, ot, OR_TMP0, 0); > gen_extu(ot, cpu_T[0]); > - label1 = gen_new_label(); > - tcg_gen_movi_tl(cpu_cc_dst, 0); > t0 = tcg_temp_local_new(); > tcg_gen_mov_tl(t0, cpu_T[0]); > - tcg_gen_brcondi_tl(TCG_COND_EQ, t0, 0, label1); > - if (b & 1) { > - gen_helper_bsr(cpu_T[0], t0); > + if ((b & 1) && (prefixes & PREFIX_REPZ) && > + (s->cpuid_ext3_features & CPUID_EXT3_ABM)) { > + switch(ot) { > + case OT_WORD: gen_helper_bsr(cpu_T[0], t0, > + tcg_const_i32(16)); break; > + case OT_LONG: gen_helper_bsr(cpu_T[0], t0, > + tcg_const_i32(32)); break; > + case OT_QUAD: gen_helper_bsr(cpu_T[0], t0, > + tcg_const_i32(64)); break; > + } > + gen_op_mov_reg_T0(ot, reg); > } else { > - gen_helper_bsf(cpu_T[0], t0); > + label1 = gen_new_label(); > + tcg_gen_movi_tl(cpu_cc_dst, 0); > + tcg_gen_brcondi_tl(TCG_COND_EQ, t0, 0, label1); > + if (b & 1) { > + gen_helper_bsr(cpu_T[0], t0, tcg_const_i32(0)); > + } else { > + gen_helper_bsf(cpu_T[0], t0); > + } > + gen_op_mov_reg_T0(ot, reg); > + tcg_gen_movi_tl(cpu_cc_dst, 1); > + gen_set_label(label1); > + tcg_gen_discard_tl(cpu_cc_src); > + s->cc_op = CC_OP_LOGICB + ot; > + tcg_temp_free(t0); > } > - gen_op_mov_reg_T0(ot, reg); > - tcg_gen_movi_tl(cpu_cc_dst, 1); > - gen_set_label(label1); > - tcg_gen_discard_tl(cpu_cc_src); > - s->cc_op = CC_OP_LOGICB + ot; > - tcg_temp_free(t0); > } > break; > /************************/ > -- > 1.6.1.3 > > > > >
diff --git a/target-i386/helper.h b/target-i386/helper.h index 68d57b1..38d0708 100644 --- a/target-i386/helper.h +++ b/target-i386/helper.h @@ -191,7 +191,7 @@ DEF_HELPER_2(frstor, void, tl, int) DEF_HELPER_2(fxsave, void, tl, int) DEF_HELPER_2(fxrstor, void, tl, int) DEF_HELPER_1(bsf, tl, tl) -DEF_HELPER_1(bsr, tl, tl) +DEF_HELPER_2(bsr, tl, tl, int) /* MMX/SSE */ diff --git a/target-i386/op_helper.c b/target-i386/op_helper.c index c3f5af6..9ffda36 100644 --- a/target-i386/op_helper.c +++ b/target-i386/op_helper.c @@ -5457,11 +5457,14 @@ target_ulong helper_bsf(target_ulong t0) return count; } -target_ulong helper_bsr(target_ulong t0) +target_ulong helper_bsr(target_ulong t0, int lzcnt) { int count; target_ulong res, mask; - + + if (lzcnt > 0 && t0 == 0) { + return lzcnt; + } res = t0; count = TARGET_LONG_BITS - 1; mask = (target_ulong)1 << (TARGET_LONG_BITS - 1); @@ -5469,10 +5472,12 @@ target_ulong helper_bsr(target_ulong t0) count--; res <<= 1; } + if (lzcnt > 0) { + return lzcnt - 1 - count; + } return count; } - static int compute_all_eflags(void) { return CC_SRC; diff --git a/target-i386/translate.c b/target-i386/translate.c index 335fc08..aaa4492 100644 --- a/target-i386/translate.c +++ b/target-i386/translate.c @@ -6538,22 +6538,35 @@ static target_ulong disas_insn(DisasContext *s, target_ulong pc_start) reg = ((modrm >> 3) & 7) | rex_r; gen_ldst_modrm(s, modrm, ot, OR_TMP0, 0); gen_extu(ot, cpu_T[0]); - label1 = gen_new_label(); - tcg_gen_movi_tl(cpu_cc_dst, 0); t0 = tcg_temp_local_new(); tcg_gen_mov_tl(t0, cpu_T[0]); - tcg_gen_brcondi_tl(TCG_COND_EQ, t0, 0, label1); - if (b & 1) { - gen_helper_bsr(cpu_T[0], t0); + if ((b & 1) && (prefixes & PREFIX_REPZ) && + (s->cpuid_ext3_features & CPUID_EXT3_ABM)) { + switch(ot) { + case OT_WORD: gen_helper_bsr(cpu_T[0], t0, + tcg_const_i32(16)); break; + case OT_LONG: gen_helper_bsr(cpu_T[0], t0, + tcg_const_i32(32)); break; + case OT_QUAD: gen_helper_bsr(cpu_T[0], t0, + tcg_const_i32(64)); break; + } + gen_op_mov_reg_T0(ot, reg); } else { - gen_helper_bsf(cpu_T[0], t0); + label1 = gen_new_label(); + tcg_gen_movi_tl(cpu_cc_dst, 0); + tcg_gen_brcondi_tl(TCG_COND_EQ, t0, 0, label1); + if (b & 1) { + gen_helper_bsr(cpu_T[0], t0, tcg_const_i32(0)); + } else { + gen_helper_bsf(cpu_T[0], t0); + } + gen_op_mov_reg_T0(ot, reg); + tcg_gen_movi_tl(cpu_cc_dst, 1); + gen_set_label(label1); + tcg_gen_discard_tl(cpu_cc_src); + s->cc_op = CC_OP_LOGICB + ot; + tcg_temp_free(t0); } - gen_op_mov_reg_T0(ot, reg); - tcg_gen_movi_tl(cpu_cc_dst, 1); - gen_set_label(label1); - tcg_gen_discard_tl(cpu_cc_src); - s->cc_op = CC_OP_LOGICB + ot; - tcg_temp_free(t0); } break; /************************/
lzcnt is an AMD Phenom/Barcelona added instruction returning the number of leading zero bits in a word. As this is similar to the "bsr" instruction, reuse the existing code. There need to be some more changes, though, as lzcnt always returns a valid value (in opposite to bsr, which has a special case when the operand is 0). lzcnt is guarded by the ABM CPUID bit (Fn8000_0001:ECX[5]). Signed-off-by: Andre Przywara <andre.przywara@amd.com> --- target-i386/helper.h | 2 +- target-i386/op_helper.c | 11 ++++++++--- target-i386/translate.c | 37 +++++++++++++++++++++++++------------ 3 files changed, 34 insertions(+), 16 deletions(-)