Patchwork [1/4,v2] TCG x86: implement lzcnt emulation

login
register
mail settings
Submitter Andre Przywara
Date Oct. 5, 2009, 8:20 a.m.
Message ID <1254730801-5045-1-git-send-email-andre.przywara@amd.com>
Download mbox | patch
Permalink /patch/34946/
State Superseded
Headers show

Comments

Andre Przywara - Oct. 5, 2009, 8:20 a.m.
lzcnt is a AMD Phenom/Barcelona added instruction returning the
number of leading zero bits in a word.
As this is similar to the "bsr" instruction, reuse the existing
code. There need to be some more changes, though, as lzcnt always
returns a valid value (in opposite to bsr, which has a special
case when the operand is 0).
lzcnt is guarded by the ABM CPUID bit (Fn8000_0001:ECX_5).

Signed-off-by: Andre Przywara <andre.przywara@amd.com>
---
 target-i386/helper.h    |    1 +
 target-i386/op_helper.c |   14 ++++++++++++--
 target-i386/translate.c |   37 +++++++++++++++++++++++++------------
 3 files changed, 38 insertions(+), 14 deletions(-)

Aurelien,

this version addresses your comments.

Thanks for the review (and the other commits)!

Regards,
Andre.
Aurelien Jarno - Oct. 5, 2009, 1:57 p.m.
On Mon, Oct 05, 2009 at 10:20:01AM +0200, Andre Przywara wrote:
> lzcnt is a AMD Phenom/Barcelona added instruction returning the
> number of leading zero bits in a word.
> As this is similar to the "bsr" instruction, reuse the existing
> code. There need to be some more changes, though, as lzcnt always
> returns a valid value (in opposite to bsr, which has a special
> case when the operand is 0).
> lzcnt is guarded by the ABM CPUID bit (Fn8000_0001:ECX_5).
> 
> Signed-off-by: Andre Przywara <andre.przywara@amd.com>
> ---
>  target-i386/helper.h    |    1 +
>  target-i386/op_helper.c |   14 ++++++++++++--
>  target-i386/translate.c |   37 +++++++++++++++++++++++++------------
>  3 files changed, 38 insertions(+), 14 deletions(-)
> 
> Aurelien,
> 
> this version addresses your comments.
> 
> Thanks for the review (and the other commits)!
> 

Thanks, for the new version. There is still a minor issue I haven't
spotted at the first review. See the inline comment.

> diff --git a/target-i386/helper.h b/target-i386/helper.h
> index ca953f4..6b518ad 100644
> --- a/target-i386/helper.h
> +++ b/target-i386/helper.h
> @@ -193,6 +193,7 @@ DEF_HELPER_2(fxsave, void, tl, int)
>  DEF_HELPER_2(fxrstor, void, tl, int)
>  DEF_HELPER_1(bsf, tl, tl)
>  DEF_HELPER_1(bsr, tl, tl)
> +DEF_HELPER_2(lzcnt, tl, tl, int)
>  
>  /* MMX/SSE */
>  
> diff --git a/target-i386/op_helper.c b/target-i386/op_helper.c
> index 26fe612..5eea322 100644
> --- a/target-i386/op_helper.c
> +++ b/target-i386/op_helper.c
> @@ -5479,11 +5479,14 @@ target_ulong helper_bsf(target_ulong t0)
>      return count;
>  }
>  
> -target_ulong helper_bsr(target_ulong t0)
> +target_ulong helper_lzcnt(target_ulong t0, int wordsize)
>  {
>      int count;
>      target_ulong res, mask;
> -    
> +
> +    if (wordsize > 0 && t0 == 0) {
> +        return wordsize;
> +    }
>      res = t0;
>      count = TARGET_LONG_BITS - 1;
>      mask = (target_ulong)1 << (TARGET_LONG_BITS - 1);
> @@ -5491,9 +5494,16 @@ target_ulong helper_bsr(target_ulong t0)
>          count--;
>          res <<= 1;
>      }
> +    if (wordsize > 0) {
> +        return wordsize - 1 - count;
> +    }
>      return count;
>  }
>  
> +target_ulong helper_bsr(target_ulong t0)
> +{
> +	return helper_lzcnt(t0, 0);
> +}
>  
>  static int compute_all_eflags(void)
>  {
> diff --git a/target-i386/translate.c b/target-i386/translate.c
> index e3cb49f..5cbdce1 100644
> --- a/target-i386/translate.c
> +++ b/target-i386/translate.c
> @@ -6575,22 +6575,35 @@ static target_ulong disas_insn(DisasContext *s, target_ulong pc_start)
>              reg = ((modrm >> 3) & 7) | rex_r;
>              gen_ldst_modrm(s, modrm, ot, OR_TMP0, 0);
>              gen_extu(ot, cpu_T[0]);
> -            label1 = gen_new_label();
> -            tcg_gen_movi_tl(cpu_cc_dst, 0);
>              t0 = tcg_temp_local_new();
>              tcg_gen_mov_tl(t0, cpu_T[0]);
> -            tcg_gen_brcondi_tl(TCG_COND_EQ, t0, 0, label1);
> -            if (b & 1) {
> -                gen_helper_bsr(cpu_T[0], t0);
> +            if ((b & 1) && (prefixes & PREFIX_REPZ) &&
> +                (s->cpuid_ext3_features & CPUID_EXT3_ABM)) {
> +                switch(ot) {
> +                case OT_WORD: gen_helper_lzcnt(cpu_T[0], t0,
> +                    tcg_const_i32(16)); break;
> +                case OT_LONG: gen_helper_lzcnt(cpu_T[0], t0,
> +                    tcg_const_i32(32)); break;
> +                case OT_QUAD: gen_helper_lzcnt(cpu_T[0], t0,
> +                    tcg_const_i32(64)); break;
> +                }
> +                gen_op_mov_reg_T0(ot, reg);
>              } else {
> -                gen_helper_bsf(cpu_T[0], t0);
> +                label1 = gen_new_label();
> +                tcg_gen_movi_tl(cpu_cc_dst, 0);
> +                tcg_gen_brcondi_tl(TCG_COND_EQ, t0, 0, label1);
> +                if (b & 1) {
> +                    gen_helper_bsr(cpu_T[0], t0);
> +                } else {
> +                    gen_helper_bsf(cpu_T[0], t0);
> +                }
> +                gen_op_mov_reg_T0(ot, reg);
> +                tcg_gen_movi_tl(cpu_cc_dst, 1);
> +                gen_set_label(label1);
> +                tcg_gen_discard_tl(cpu_cc_src);
> +                s->cc_op = CC_OP_LOGICB + ot;
> +                tcg_temp_free(t0);
>              }
> -            gen_op_mov_reg_T0(ot, reg);
> -            tcg_gen_movi_tl(cpu_cc_dst, 1);
> -            gen_set_label(label1);
> -            tcg_gen_discard_tl(cpu_cc_src);
> -            s->cc_op = CC_OP_LOGICB + ot;
> -            tcg_temp_free(t0);

The tcg_temp_free(t0) is missing in the lzcnt path. As it is common, the
best is probably to not move it from here.

>          }
>          break;
>          /************************/
> -- 
> 1.6.1.3
> 
> 
>

Patch

diff --git a/target-i386/helper.h b/target-i386/helper.h
index ca953f4..6b518ad 100644
--- a/target-i386/helper.h
+++ b/target-i386/helper.h
@@ -193,6 +193,7 @@  DEF_HELPER_2(fxsave, void, tl, int)
 DEF_HELPER_2(fxrstor, void, tl, int)
 DEF_HELPER_1(bsf, tl, tl)
 DEF_HELPER_1(bsr, tl, tl)
+DEF_HELPER_2(lzcnt, tl, tl, int)
 
 /* MMX/SSE */
 
diff --git a/target-i386/op_helper.c b/target-i386/op_helper.c
index 26fe612..5eea322 100644
--- a/target-i386/op_helper.c
+++ b/target-i386/op_helper.c
@@ -5479,11 +5479,14 @@  target_ulong helper_bsf(target_ulong t0)
     return count;
 }
 
-target_ulong helper_bsr(target_ulong t0)
+target_ulong helper_lzcnt(target_ulong t0, int wordsize)
 {
     int count;
     target_ulong res, mask;
-    
+
+    if (wordsize > 0 && t0 == 0) {
+        return wordsize;
+    }
     res = t0;
     count = TARGET_LONG_BITS - 1;
     mask = (target_ulong)1 << (TARGET_LONG_BITS - 1);
@@ -5491,9 +5494,16 @@  target_ulong helper_bsr(target_ulong t0)
         count--;
         res <<= 1;
     }
+    if (wordsize > 0) {
+        return wordsize - 1 - count;
+    }
     return count;
 }
 
+target_ulong helper_bsr(target_ulong t0)
+{
+	return helper_lzcnt(t0, 0);
+}
 
 static int compute_all_eflags(void)
 {
diff --git a/target-i386/translate.c b/target-i386/translate.c
index e3cb49f..5cbdce1 100644
--- a/target-i386/translate.c
+++ b/target-i386/translate.c
@@ -6575,22 +6575,35 @@  static target_ulong disas_insn(DisasContext *s, target_ulong pc_start)
             reg = ((modrm >> 3) & 7) | rex_r;
             gen_ldst_modrm(s, modrm, ot, OR_TMP0, 0);
             gen_extu(ot, cpu_T[0]);
-            label1 = gen_new_label();
-            tcg_gen_movi_tl(cpu_cc_dst, 0);
             t0 = tcg_temp_local_new();
             tcg_gen_mov_tl(t0, cpu_T[0]);
-            tcg_gen_brcondi_tl(TCG_COND_EQ, t0, 0, label1);
-            if (b & 1) {
-                gen_helper_bsr(cpu_T[0], t0);
+            if ((b & 1) && (prefixes & PREFIX_REPZ) &&
+                (s->cpuid_ext3_features & CPUID_EXT3_ABM)) {
+                switch(ot) {
+                case OT_WORD: gen_helper_lzcnt(cpu_T[0], t0,
+                    tcg_const_i32(16)); break;
+                case OT_LONG: gen_helper_lzcnt(cpu_T[0], t0,
+                    tcg_const_i32(32)); break;
+                case OT_QUAD: gen_helper_lzcnt(cpu_T[0], t0,
+                    tcg_const_i32(64)); break;
+                }
+                gen_op_mov_reg_T0(ot, reg);
             } else {
-                gen_helper_bsf(cpu_T[0], t0);
+                label1 = gen_new_label();
+                tcg_gen_movi_tl(cpu_cc_dst, 0);
+                tcg_gen_brcondi_tl(TCG_COND_EQ, t0, 0, label1);
+                if (b & 1) {
+                    gen_helper_bsr(cpu_T[0], t0);
+                } else {
+                    gen_helper_bsf(cpu_T[0], t0);
+                }
+                gen_op_mov_reg_T0(ot, reg);
+                tcg_gen_movi_tl(cpu_cc_dst, 1);
+                gen_set_label(label1);
+                tcg_gen_discard_tl(cpu_cc_src);
+                s->cc_op = CC_OP_LOGICB + ot;
+                tcg_temp_free(t0);
             }
-            gen_op_mov_reg_T0(ot, reg);
-            tcg_gen_movi_tl(cpu_cc_dst, 1);
-            gen_set_label(label1);
-            tcg_gen_discard_tl(cpu_cc_src);
-            s->cc_op = CC_OP_LOGICB + ot;
-            tcg_temp_free(t0);
         }
         break;
         /************************/