diff mbox

target-i386: Preserve the Z bit for bt/bts/btr/btc

Message ID 1397076986-23562-1-git-send-email-rth@twiddle.net
State New
Headers show

Commit Message

Richard Henderson April 9, 2014, 8:56 p.m. UTC
Older Intel manuals (pre-2010) describe Z as undefined, but AMD and
newer Intel manuals describe Z as unchanged.

Signed-off-by: Richard Henderson <rth@twiddle.net>
---
 target-i386/translate.c | 40 +++++++++++++++++++++++++++++++---------
 1 file changed, 31 insertions(+), 9 deletions(-)

---
Clemens, your patch fails to fix flags computation for bts/btr/btc,
which should be done similarly to bt.  

And to answer your question, no, QEMU does not make any assumptions
about undefined flags.  We often set them to zero, just because that
is easier than any other setting.


r~

Comments

Clemens Kolbitsch April 19, 2014, 10:40 p.m. UTC | #1
Great, thanks for the feedback and the fix Richard!


On Wed, Apr 9, 2014 at 1:56 PM, Richard Henderson <rth@twiddle.net> wrote:

> Older Intel manuals (pre-2010) describe Z as undefined, but AMD and
> newer Intel manuals describe Z as unchanged.
>
> Signed-off-by: Richard Henderson <rth@twiddle.net>
> ---
>  target-i386/translate.c | 40 +++++++++++++++++++++++++++++++---------
>  1 file changed, 31 insertions(+), 9 deletions(-)
>
> ---
> Clemens, your patch fails to fix flags computation for bts/btr/btc,
> which should be done similarly to bt.
>
> And to answer your question, no, QEMU does not make any assumptions
> about undefined flags.  We often set them to zero, just because that
> is easier than any other setting.
>
>
> r~
>
>
>
> diff --git a/target-i386/translate.c b/target-i386/translate.c
> index 02625e3..032b0fd 100644
> --- a/target-i386/translate.c
> +++ b/target-i386/translate.c
> @@ -6708,41 +6708,63 @@ static target_ulong disas_insn(CPUX86State *env,
> DisasContext *s,
>          }
>      bt_op:
>          tcg_gen_andi_tl(cpu_T[1], cpu_T[1], (1 << (3 + ot)) - 1);
> +        tcg_gen_shr_tl(cpu_tmp4, cpu_T[0], cpu_T[1]);
>          switch(op) {
>          case 0:
> -            tcg_gen_shr_tl(cpu_cc_src, cpu_T[0], cpu_T[1]);
> -            tcg_gen_movi_tl(cpu_cc_dst, 0);
>              break;
>          case 1:
> -            tcg_gen_shr_tl(cpu_tmp4, cpu_T[0], cpu_T[1]);
>              tcg_gen_movi_tl(cpu_tmp0, 1);
>              tcg_gen_shl_tl(cpu_tmp0, cpu_tmp0, cpu_T[1]);
>              tcg_gen_or_tl(cpu_T[0], cpu_T[0], cpu_tmp0);
>              break;
>          case 2:
> -            tcg_gen_shr_tl(cpu_tmp4, cpu_T[0], cpu_T[1]);
>              tcg_gen_movi_tl(cpu_tmp0, 1);
>              tcg_gen_shl_tl(cpu_tmp0, cpu_tmp0, cpu_T[1]);
> -            tcg_gen_not_tl(cpu_tmp0, cpu_tmp0);
> -            tcg_gen_and_tl(cpu_T[0], cpu_T[0], cpu_tmp0);
> +            tcg_gen_andc_tl(cpu_T[0], cpu_T[0], cpu_tmp0);
>              break;
>          default:
>          case 3:
> -            tcg_gen_shr_tl(cpu_tmp4, cpu_T[0], cpu_T[1]);
>              tcg_gen_movi_tl(cpu_tmp0, 1);
>              tcg_gen_shl_tl(cpu_tmp0, cpu_tmp0, cpu_T[1]);
>              tcg_gen_xor_tl(cpu_T[0], cpu_T[0], cpu_tmp0);
>              break;
>          }
> -        set_cc_op(s, CC_OP_SARB + ot);
>          if (op != 0) {
>              if (mod != 3) {
>                  gen_op_st_v(s, ot, cpu_T[0], cpu_A0);
>              } else {
>                  gen_op_mov_reg_v(ot, rm, cpu_T[0]);
>              }
> +        }
> +
> +        /* Delay all CC updates until after the store above.  Note that
> +           C is the result of the test, Z is unchanged, and the others
> +           are all undefined.  */
> +        switch (s->cc_op) {
> +        case CC_OP_MULB ... CC_OP_MULQ:
> +        case CC_OP_ADDB ... CC_OP_ADDQ:
> +        case CC_OP_ADCB ... CC_OP_ADCQ:
> +        case CC_OP_SUBB ... CC_OP_SUBQ:
> +        case CC_OP_SBBB ... CC_OP_SBBQ:
> +        case CC_OP_LOGICB ... CC_OP_LOGICQ:
> +        case CC_OP_INCB ... CC_OP_INCQ:
> +        case CC_OP_DECB ... CC_OP_DECQ:
> +        case CC_OP_SHLB ... CC_OP_SHLQ:
> +        case CC_OP_SARB ... CC_OP_SARQ:
> +        case CC_OP_BMILGB ... CC_OP_BMILGQ:
> +            /* Z was going to be computed from the non-zero status of
> CC_DST.
> +               We can get that same Z value (and the new C value) by
> leaving
> +               CC_DST alone, setting CC_SRC, and using a CC_OP_SAR of the
> +               same width.  */
>              tcg_gen_mov_tl(cpu_cc_src, cpu_tmp4);
> -            tcg_gen_movi_tl(cpu_cc_dst, 0);
> +            set_cc_op(s, ((s->cc_op - CC_OP_MULB) & 3) + CC_OP_SARB);
> +            break;
> +        default:
> +            /* Otherwise, generate EFLAGS and replace the C bit.  */
> +            gen_compute_eflags(s);
> +            tcg_gen_deposit_tl(cpu_cc_src, cpu_cc_src, cpu_tmp4,
> +                               ctz32(CC_C), 1);
> +            break;
>          }
>          break;
>      case 0x1bc: /* bsf / tzcnt */
> --
> 1.9.0
>
>
Paolo Bonzini April 20, 2014, 3:12 a.m. UTC | #2
Il 09/04/2014 16:56, Richard Henderson ha scritto:
> Older Intel manuals (pre-2010) describe Z as undefined, but AMD and
> newer Intel manuals describe Z as unchanged.
>
> Signed-off-by: Richard Henderson <rth@twiddle.net>
> ---
>  target-i386/translate.c | 40 +++++++++++++++++++++++++++++++---------
>  1 file changed, 31 insertions(+), 9 deletions(-)
>
> ---
> Clemens, your patch fails to fix flags computation for bts/btr/btc,
> which should be done similarly to bt.
>
> And to answer your question, no, QEMU does not make any assumptions
> about undefined flags.  We often set them to zero, just because that
> is easier than any other setting.
>
>
> r~
>
>
>
> diff --git a/target-i386/translate.c b/target-i386/translate.c
> index 02625e3..032b0fd 100644
> --- a/target-i386/translate.c
> +++ b/target-i386/translate.c
> @@ -6708,41 +6708,63 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s,
>          }
>      bt_op:
>          tcg_gen_andi_tl(cpu_T[1], cpu_T[1], (1 << (3 + ot)) - 1);
> +        tcg_gen_shr_tl(cpu_tmp4, cpu_T[0], cpu_T[1]);
>          switch(op) {
>          case 0:
> -            tcg_gen_shr_tl(cpu_cc_src, cpu_T[0], cpu_T[1]);
> -            tcg_gen_movi_tl(cpu_cc_dst, 0);
>              break;
>          case 1:
> -            tcg_gen_shr_tl(cpu_tmp4, cpu_T[0], cpu_T[1]);
>              tcg_gen_movi_tl(cpu_tmp0, 1);
>              tcg_gen_shl_tl(cpu_tmp0, cpu_tmp0, cpu_T[1]);
>              tcg_gen_or_tl(cpu_T[0], cpu_T[0], cpu_tmp0);
>              break;
>          case 2:
> -            tcg_gen_shr_tl(cpu_tmp4, cpu_T[0], cpu_T[1]);
>              tcg_gen_movi_tl(cpu_tmp0, 1);
>              tcg_gen_shl_tl(cpu_tmp0, cpu_tmp0, cpu_T[1]);
> -            tcg_gen_not_tl(cpu_tmp0, cpu_tmp0);
> -            tcg_gen_and_tl(cpu_T[0], cpu_T[0], cpu_tmp0);
> +            tcg_gen_andc_tl(cpu_T[0], cpu_T[0], cpu_tmp0);
>              break;
>          default:
>          case 3:
> -            tcg_gen_shr_tl(cpu_tmp4, cpu_T[0], cpu_T[1]);
>              tcg_gen_movi_tl(cpu_tmp0, 1);
>              tcg_gen_shl_tl(cpu_tmp0, cpu_tmp0, cpu_T[1]);
>              tcg_gen_xor_tl(cpu_T[0], cpu_T[0], cpu_tmp0);
>              break;
>          }
> -        set_cc_op(s, CC_OP_SARB + ot);
>          if (op != 0) {
>              if (mod != 3) {
>                  gen_op_st_v(s, ot, cpu_T[0], cpu_A0);
>              } else {
>                  gen_op_mov_reg_v(ot, rm, cpu_T[0]);
>              }
> +        }
> +
> +        /* Delay all CC updates until after the store above.  Note that
> +           C is the result of the test, Z is unchanged, and the others
> +           are all undefined.  */
> +        switch (s->cc_op) {
> +        case CC_OP_MULB ... CC_OP_MULQ:
> +        case CC_OP_ADDB ... CC_OP_ADDQ:
> +        case CC_OP_ADCB ... CC_OP_ADCQ:
> +        case CC_OP_SUBB ... CC_OP_SUBQ:
> +        case CC_OP_SBBB ... CC_OP_SBBQ:
> +        case CC_OP_LOGICB ... CC_OP_LOGICQ:
> +        case CC_OP_INCB ... CC_OP_INCQ:
> +        case CC_OP_DECB ... CC_OP_DECQ:
> +        case CC_OP_SHLB ... CC_OP_SHLQ:
> +        case CC_OP_SARB ... CC_OP_SARQ:
> +        case CC_OP_BMILGB ... CC_OP_BMILGQ:
> +            /* Z was going to be computed from the non-zero status of CC_DST.
> +               We can get that same Z value (and the new C value) by leaving
> +               CC_DST alone, setting CC_SRC, and using a CC_OP_SAR of the
> +               same width.  */
>              tcg_gen_mov_tl(cpu_cc_src, cpu_tmp4);
> -            tcg_gen_movi_tl(cpu_cc_dst, 0);
> +            set_cc_op(s, ((s->cc_op - CC_OP_MULB) & 3) + CC_OP_SARB);
> +            break;
> +        default:
> +            /* Otherwise, generate EFLAGS and replace the C bit.  */
> +            gen_compute_eflags(s);
> +            tcg_gen_deposit_tl(cpu_cc_src, cpu_cc_src, cpu_tmp4,
> +                               ctz32(CC_C), 1);
> +            break;
>          }
>          break;
>      case 0x1bc: /* bsf / tzcnt */
>

Cc: qemu-stable@nongnu.org
Reviewed-by: Paolo Bonzini <pbonzini@redhat.com>

Hmm, actually the comments aren't following the common style of 
asterisks-on-every-line.  Just fix it up without posting v2.

Paolo
diff mbox

Patch

diff --git a/target-i386/translate.c b/target-i386/translate.c
index 02625e3..032b0fd 100644
--- a/target-i386/translate.c
+++ b/target-i386/translate.c
@@ -6708,41 +6708,63 @@  static target_ulong disas_insn(CPUX86State *env, DisasContext *s,
         }
     bt_op:
         tcg_gen_andi_tl(cpu_T[1], cpu_T[1], (1 << (3 + ot)) - 1);
+        tcg_gen_shr_tl(cpu_tmp4, cpu_T[0], cpu_T[1]);
         switch(op) {
         case 0:
-            tcg_gen_shr_tl(cpu_cc_src, cpu_T[0], cpu_T[1]);
-            tcg_gen_movi_tl(cpu_cc_dst, 0);
             break;
         case 1:
-            tcg_gen_shr_tl(cpu_tmp4, cpu_T[0], cpu_T[1]);
             tcg_gen_movi_tl(cpu_tmp0, 1);
             tcg_gen_shl_tl(cpu_tmp0, cpu_tmp0, cpu_T[1]);
             tcg_gen_or_tl(cpu_T[0], cpu_T[0], cpu_tmp0);
             break;
         case 2:
-            tcg_gen_shr_tl(cpu_tmp4, cpu_T[0], cpu_T[1]);
             tcg_gen_movi_tl(cpu_tmp0, 1);
             tcg_gen_shl_tl(cpu_tmp0, cpu_tmp0, cpu_T[1]);
-            tcg_gen_not_tl(cpu_tmp0, cpu_tmp0);
-            tcg_gen_and_tl(cpu_T[0], cpu_T[0], cpu_tmp0);
+            tcg_gen_andc_tl(cpu_T[0], cpu_T[0], cpu_tmp0);
             break;
         default:
         case 3:
-            tcg_gen_shr_tl(cpu_tmp4, cpu_T[0], cpu_T[1]);
             tcg_gen_movi_tl(cpu_tmp0, 1);
             tcg_gen_shl_tl(cpu_tmp0, cpu_tmp0, cpu_T[1]);
             tcg_gen_xor_tl(cpu_T[0], cpu_T[0], cpu_tmp0);
             break;
         }
-        set_cc_op(s, CC_OP_SARB + ot);
         if (op != 0) {
             if (mod != 3) {
                 gen_op_st_v(s, ot, cpu_T[0], cpu_A0);
             } else {
                 gen_op_mov_reg_v(ot, rm, cpu_T[0]);
             }
+        }
+
+        /* Delay all CC updates until after the store above.  Note that
+           C is the result of the test, Z is unchanged, and the others
+           are all undefined.  */
+        switch (s->cc_op) {
+        case CC_OP_MULB ... CC_OP_MULQ:
+        case CC_OP_ADDB ... CC_OP_ADDQ:
+        case CC_OP_ADCB ... CC_OP_ADCQ:
+        case CC_OP_SUBB ... CC_OP_SUBQ:
+        case CC_OP_SBBB ... CC_OP_SBBQ:
+        case CC_OP_LOGICB ... CC_OP_LOGICQ:
+        case CC_OP_INCB ... CC_OP_INCQ:
+        case CC_OP_DECB ... CC_OP_DECQ:
+        case CC_OP_SHLB ... CC_OP_SHLQ:
+        case CC_OP_SARB ... CC_OP_SARQ:
+        case CC_OP_BMILGB ... CC_OP_BMILGQ:
+            /* Z was going to be computed from the non-zero status of CC_DST.
+               We can get that same Z value (and the new C value) by leaving
+               CC_DST alone, setting CC_SRC, and using a CC_OP_SAR of the
+               same width.  */
             tcg_gen_mov_tl(cpu_cc_src, cpu_tmp4);
-            tcg_gen_movi_tl(cpu_cc_dst, 0);
+            set_cc_op(s, ((s->cc_op - CC_OP_MULB) & 3) + CC_OP_SARB);
+            break;
+        default:
+            /* Otherwise, generate EFLAGS and replace the C bit.  */
+            gen_compute_eflags(s);
+            tcg_gen_deposit_tl(cpu_cc_src, cpu_cc_src, cpu_tmp4,
+                               ctz32(CC_C), 1);
+            break;
         }
         break;
     case 0x1bc: /* bsf / tzcnt */