Message ID | 1254730801-5045-1-git-send-email-andre.przywara@amd.com |
---|---|
State | Superseded |
Headers | show |
On Mon, Oct 05, 2009 at 10:20:01AM +0200, Andre Przywara wrote: > lzcnt is a AMD Phenom/Barcelona added instruction returning the > number of leading zero bits in a word. > As this is similar to the "bsr" instruction, reuse the existing > code. There need to be some more changes, though, as lzcnt always > returns a valid value (in opposite to bsr, which has a special > case when the operand is 0). > lzcnt is guarded by the ABM CPUID bit (Fn8000_0001:ECX_5). > > Signed-off-by: Andre Przywara <andre.przywara@amd.com> > --- > target-i386/helper.h | 1 + > target-i386/op_helper.c | 14 ++++++++++++-- > target-i386/translate.c | 37 +++++++++++++++++++++++++------------ > 3 files changed, 38 insertions(+), 14 deletions(-) > > Aurelien, > > this version addresses your comments. > > Thanks for the review (and the other commits)! > Thanks, for the new version. There is still a minor issue I haven't spotted at the first review. See the inline comment. > diff --git a/target-i386/helper.h b/target-i386/helper.h > index ca953f4..6b518ad 100644 > --- a/target-i386/helper.h > +++ b/target-i386/helper.h > @@ -193,6 +193,7 @@ DEF_HELPER_2(fxsave, void, tl, int) > DEF_HELPER_2(fxrstor, void, tl, int) > DEF_HELPER_1(bsf, tl, tl) > DEF_HELPER_1(bsr, tl, tl) > +DEF_HELPER_2(lzcnt, tl, tl, int) > > /* MMX/SSE */ > > diff --git a/target-i386/op_helper.c b/target-i386/op_helper.c > index 26fe612..5eea322 100644 > --- a/target-i386/op_helper.c > +++ b/target-i386/op_helper.c > @@ -5479,11 +5479,14 @@ target_ulong helper_bsf(target_ulong t0) > return count; > } > > -target_ulong helper_bsr(target_ulong t0) > +target_ulong helper_lzcnt(target_ulong t0, int wordsize) > { > int count; > target_ulong res, mask; > - > + > + if (wordsize > 0 && t0 == 0) { > + return wordsize; > + } > res = t0; > count = TARGET_LONG_BITS - 1; > mask = (target_ulong)1 << (TARGET_LONG_BITS - 1); > @@ -5491,9 +5494,16 @@ target_ulong helper_bsr(target_ulong t0) > count--; > res <<= 1; > } > + if (wordsize > 0) { > + return wordsize - 1 - count; > + } > return count; > } > > +target_ulong helper_bsr(target_ulong t0) > +{ > + return helper_lzcnt(t0, 0); > +} > > static int compute_all_eflags(void) > { > diff --git a/target-i386/translate.c b/target-i386/translate.c > index e3cb49f..5cbdce1 100644 > --- a/target-i386/translate.c > +++ b/target-i386/translate.c > @@ -6575,22 +6575,35 @@ static target_ulong disas_insn(DisasContext *s, target_ulong pc_start) > reg = ((modrm >> 3) & 7) | rex_r; > gen_ldst_modrm(s, modrm, ot, OR_TMP0, 0); > gen_extu(ot, cpu_T[0]); > - label1 = gen_new_label(); > - tcg_gen_movi_tl(cpu_cc_dst, 0); > t0 = tcg_temp_local_new(); > tcg_gen_mov_tl(t0, cpu_T[0]); > - tcg_gen_brcondi_tl(TCG_COND_EQ, t0, 0, label1); > - if (b & 1) { > - gen_helper_bsr(cpu_T[0], t0); > + if ((b & 1) && (prefixes & PREFIX_REPZ) && > + (s->cpuid_ext3_features & CPUID_EXT3_ABM)) { > + switch(ot) { > + case OT_WORD: gen_helper_lzcnt(cpu_T[0], t0, > + tcg_const_i32(16)); break; > + case OT_LONG: gen_helper_lzcnt(cpu_T[0], t0, > + tcg_const_i32(32)); break; > + case OT_QUAD: gen_helper_lzcnt(cpu_T[0], t0, > + tcg_const_i32(64)); break; > + } > + gen_op_mov_reg_T0(ot, reg); > } else { > - gen_helper_bsf(cpu_T[0], t0); > + label1 = gen_new_label(); > + tcg_gen_movi_tl(cpu_cc_dst, 0); > + tcg_gen_brcondi_tl(TCG_COND_EQ, t0, 0, label1); > + if (b & 1) { > + gen_helper_bsr(cpu_T[0], t0); > + } else { > + gen_helper_bsf(cpu_T[0], t0); > + } > + gen_op_mov_reg_T0(ot, reg); > + tcg_gen_movi_tl(cpu_cc_dst, 1); > + gen_set_label(label1); > + tcg_gen_discard_tl(cpu_cc_src); > + s->cc_op = CC_OP_LOGICB + ot; > + tcg_temp_free(t0); > } > - gen_op_mov_reg_T0(ot, reg); > - tcg_gen_movi_tl(cpu_cc_dst, 1); > - gen_set_label(label1); > - tcg_gen_discard_tl(cpu_cc_src); > - s->cc_op = CC_OP_LOGICB + ot; > - tcg_temp_free(t0); The tcg_temp_free(t0) is missing in the lzcnt path. As it is common, the best is probably to not move it from here. > } > break; > /************************/ > -- > 1.6.1.3 > > >
diff --git a/target-i386/helper.h b/target-i386/helper.h index ca953f4..6b518ad 100644 --- a/target-i386/helper.h +++ b/target-i386/helper.h @@ -193,6 +193,7 @@ DEF_HELPER_2(fxsave, void, tl, int) DEF_HELPER_2(fxrstor, void, tl, int) DEF_HELPER_1(bsf, tl, tl) DEF_HELPER_1(bsr, tl, tl) +DEF_HELPER_2(lzcnt, tl, tl, int) /* MMX/SSE */ diff --git a/target-i386/op_helper.c b/target-i386/op_helper.c index 26fe612..5eea322 100644 --- a/target-i386/op_helper.c +++ b/target-i386/op_helper.c @@ -5479,11 +5479,14 @@ target_ulong helper_bsf(target_ulong t0) return count; } -target_ulong helper_bsr(target_ulong t0) +target_ulong helper_lzcnt(target_ulong t0, int wordsize) { int count; target_ulong res, mask; - + + if (wordsize > 0 && t0 == 0) { + return wordsize; + } res = t0; count = TARGET_LONG_BITS - 1; mask = (target_ulong)1 << (TARGET_LONG_BITS - 1); @@ -5491,9 +5494,16 @@ target_ulong helper_bsr(target_ulong t0) count--; res <<= 1; } + if (wordsize > 0) { + return wordsize - 1 - count; + } return count; } +target_ulong helper_bsr(target_ulong t0) +{ + return helper_lzcnt(t0, 0); +} static int compute_all_eflags(void) { diff --git a/target-i386/translate.c b/target-i386/translate.c index e3cb49f..5cbdce1 100644 --- a/target-i386/translate.c +++ b/target-i386/translate.c @@ -6575,22 +6575,35 @@ static target_ulong disas_insn(DisasContext *s, target_ulong pc_start) reg = ((modrm >> 3) & 7) | rex_r; gen_ldst_modrm(s, modrm, ot, OR_TMP0, 0); gen_extu(ot, cpu_T[0]); - label1 = gen_new_label(); - tcg_gen_movi_tl(cpu_cc_dst, 0); t0 = tcg_temp_local_new(); tcg_gen_mov_tl(t0, cpu_T[0]); - tcg_gen_brcondi_tl(TCG_COND_EQ, t0, 0, label1); - if (b & 1) { - gen_helper_bsr(cpu_T[0], t0); + if ((b & 1) && (prefixes & PREFIX_REPZ) && + (s->cpuid_ext3_features & CPUID_EXT3_ABM)) { + switch(ot) { + case OT_WORD: gen_helper_lzcnt(cpu_T[0], t0, + tcg_const_i32(16)); break; + case OT_LONG: gen_helper_lzcnt(cpu_T[0], t0, + tcg_const_i32(32)); break; + case OT_QUAD: gen_helper_lzcnt(cpu_T[0], t0, + tcg_const_i32(64)); break; + } + gen_op_mov_reg_T0(ot, reg); } else { - gen_helper_bsf(cpu_T[0], t0); + label1 = gen_new_label(); + tcg_gen_movi_tl(cpu_cc_dst, 0); + tcg_gen_brcondi_tl(TCG_COND_EQ, t0, 0, label1); + if (b & 1) { + gen_helper_bsr(cpu_T[0], t0); + } else { + gen_helper_bsf(cpu_T[0], t0); + } + gen_op_mov_reg_T0(ot, reg); + tcg_gen_movi_tl(cpu_cc_dst, 1); + gen_set_label(label1); + tcg_gen_discard_tl(cpu_cc_src); + s->cc_op = CC_OP_LOGICB + ot; + tcg_temp_free(t0); } - gen_op_mov_reg_T0(ot, reg); - tcg_gen_movi_tl(cpu_cc_dst, 1); - gen_set_label(label1); - tcg_gen_discard_tl(cpu_cc_src); - s->cc_op = CC_OP_LOGICB + ot; - tcg_temp_free(t0); } break; /************************/
lzcnt is a AMD Phenom/Barcelona added instruction returning the number of leading zero bits in a word. As this is similar to the "bsr" instruction, reuse the existing code. There need to be some more changes, though, as lzcnt always returns a valid value (in opposite to bsr, which has a special case when the operand is 0). lzcnt is guarded by the ABM CPUID bit (Fn8000_0001:ECX_5). Signed-off-by: Andre Przywara <andre.przywara@amd.com> --- target-i386/helper.h | 1 + target-i386/op_helper.c | 14 ++++++++++++-- target-i386/translate.c | 37 +++++++++++++++++++++++++------------ 3 files changed, 38 insertions(+), 14 deletions(-) Aurelien, this version addresses your comments. Thanks for the review (and the other commits)! Regards, Andre.