Patchwork [08/14] i386: do not compute eflags multiple times consecutively

login
register
mail settings
Submitter Paolo Bonzini
Date Oct. 6, 2012, 12:30 p.m.
Message ID <1349526621-13939-9-git-send-email-pbonzini@redhat.com>
Download mbox | patch
Permalink /patch/189693/
State New
Headers show

Comments

Paolo Bonzini - Oct. 6, 2012, 12:30 p.m.
After calling gen_compute_eflags, leave the computed value in cc_reg_src
and set cc_op to CC_OP_EFLAGS.  The next few patches will remove anyway
most calls to gen_compute_eflags.

As a result of this change it is more natural to remove the register
argument from gen_compute_eflags and change all the callers.

Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 target-i386/translate.c | 73 ++++++++++++++++++++++++-------------------------
 1 file modificato, 36 inserzioni(+), 37 rimozioni(-)
Blue Swirl - Oct. 7, 2012, 7:09 p.m.
On Sat, Oct 6, 2012 at 12:30 PM, Paolo Bonzini <pbonzini@redhat.com> wrote:
> After calling gen_compute_eflags, leave the computed value in cc_reg_src
> and set cc_op to CC_OP_EFLAGS.  The next few patches will remove anyway
> most calls to gen_compute_eflags.
>
> As a result of this change it is more natural to remove the register
> argument from gen_compute_eflags and change all the callers.
>
> Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>

Reviewed-by: Blue Swirl <blauwirbel@gmail.com>

> ---
>  target-i386/translate.c | 73 ++++++++++++++++++++++++-------------------------
>  1 file modificato, 36 inserzioni(+), 37 rimozioni(-)
>
> diff --git a/target-i386/translate.c b/target-i386/translate.c
> index 8f22119..09512c3 100644
> --- a/target-i386/translate.c
> +++ b/target-i386/translate.c
> @@ -834,48 +834,49 @@ static void gen_compute_eflags_c(DisasContext *s, TCGv reg)
>  }
>
>  /* compute all eflags to cc_src */
> -static void gen_compute_eflags(DisasContext *s, TCGv reg)
> +static void gen_compute_eflags(DisasContext *s)
>  {
>      if (s->cc_op != CC_OP_DYNAMIC) {
>          gen_op_set_cc_op(s->cc_op);
>      }
> -    gen_helper_cc_compute_all(cpu_tmp2_i32, cpu_env, cpu_cc_op);
> -    if (reg == cpu_cc_src) {
> -        tcg_gen_discard_tl(cpu_cc_dst);
> -        s->cc_op = CC_OP_EFLAGS;
> +    if (s->cc_op == CC_OP_EFLAGS) {
> +        return;
>      }
> -    tcg_gen_extu_i32_tl(reg, cpu_tmp2_i32);
> +    gen_helper_cc_compute_all(cpu_tmp2_i32, cpu_env, cpu_cc_op);
> +    tcg_gen_discard_tl(cpu_cc_dst);
> +    s->cc_op = CC_OP_EFLAGS;
> +    tcg_gen_extu_i32_tl(cpu_cc_src, cpu_tmp2_i32);
>  }
>
>  /* compute eflags.P to reg */
>  static void gen_compute_eflags_p(DisasContext *s, TCGv reg)
>  {
> -    gen_compute_eflags(s, reg);
> -    tcg_gen_shri_tl(reg, reg, 2);
> +    gen_compute_eflags(s);
> +    tcg_gen_shri_tl(reg, cpu_cc_src, 2);
>      tcg_gen_andi_tl(reg, reg, 1);
>  }
>
>  /* compute eflags.S to reg */
>  static void gen_compute_eflags_s(DisasContext *s, TCGv reg)
>  {
> -    gen_compute_eflags(s, reg);
> -    tcg_gen_shri_tl(reg, reg, 7);
> +    gen_compute_eflags(s);
> +    tcg_gen_shri_tl(reg, cpu_cc_src, 7);
>      tcg_gen_andi_tl(reg, reg, 1);
>  }
>
>  /* compute eflags.O to reg */
>  static void gen_compute_eflags_o(DisasContext *s, TCGv reg)
>  {
> -    gen_compute_eflags(s, reg);
> -    tcg_gen_shri_tl(reg, reg, 11);
> +    gen_compute_eflags(s);
> +    tcg_gen_shri_tl(reg, cpu_cc_src, 11);
>      tcg_gen_andi_tl(reg, reg, 1);
>  }
>
>  /* compute eflags.Z to reg */
>  static void gen_compute_eflags_z(DisasContext *s, TCGv reg)
>  {
> -    gen_compute_eflags(s, reg);
> -    tcg_gen_shri_tl(reg, reg, 6);
> +    gen_compute_eflags(s);
> +    tcg_gen_shri_tl(reg, cpu_cc_src, 6);
>      tcg_gen_andi_tl(reg, reg, 1);
>  }
>
> @@ -892,9 +893,9 @@ static inline void gen_setcc_slow_T0(DisasContext *s, int jcc_op)
>          gen_compute_eflags_z(s, cpu_T[0]);
>          break;
>      case JCC_BE:
> -        gen_compute_eflags(s, cpu_tmp0);
> -        tcg_gen_shri_tl(cpu_T[0], cpu_tmp0, 6);
> -        tcg_gen_or_tl(cpu_T[0], cpu_T[0], cpu_tmp0);
> +        gen_compute_eflags(s);
> +        tcg_gen_shri_tl(cpu_T[0], cpu_cc_src, 6);
> +        tcg_gen_or_tl(cpu_T[0], cpu_T[0], cpu_cc_src);
>          tcg_gen_andi_tl(cpu_T[0], cpu_T[0], 1);
>          break;
>      case JCC_S:
> @@ -904,18 +905,18 @@ static inline void gen_setcc_slow_T0(DisasContext *s, int jcc_op)
>          gen_compute_eflags_p(s, cpu_T[0]);
>          break;
>      case JCC_L:
> -        gen_compute_eflags(s, cpu_tmp0);
> -        tcg_gen_shri_tl(cpu_T[0], cpu_tmp0, 11); /* CC_O */
> -        tcg_gen_shri_tl(cpu_tmp0, cpu_tmp0, 7); /* CC_S */
> +        gen_compute_eflags(s);
> +        tcg_gen_shri_tl(cpu_T[0], cpu_cc_src, 11); /* CC_O */
> +        tcg_gen_shri_tl(cpu_tmp0, cpu_cc_src, 7); /* CC_S */
>          tcg_gen_xor_tl(cpu_T[0], cpu_T[0], cpu_tmp0);
>          tcg_gen_andi_tl(cpu_T[0], cpu_T[0], 1);
>          break;
>      default:
>      case JCC_LE:
> -        gen_compute_eflags(s, cpu_tmp0);
> -        tcg_gen_shri_tl(cpu_T[0], cpu_tmp0, 11); /* CC_O */
> -        tcg_gen_shri_tl(cpu_tmp4, cpu_tmp0, 7); /* CC_S */
> -        tcg_gen_shri_tl(cpu_tmp0, cpu_tmp0, 6); /* CC_Z */
> +        gen_compute_eflags(s);
> +        tcg_gen_shri_tl(cpu_T[0], cpu_cc_src, 11); /* CC_O */
> +        tcg_gen_shri_tl(cpu_tmp4, cpu_cc_src, 7); /* CC_S */
> +        tcg_gen_shri_tl(cpu_tmp0, cpu_cc_src, 6); /* CC_Z */
>          tcg_gen_xor_tl(cpu_T[0], cpu_T[0], cpu_tmp4);
>          tcg_gen_or_tl(cpu_T[0], cpu_T[0], cpu_tmp0);
>          tcg_gen_andi_tl(cpu_T[0], cpu_T[0], 1);
> @@ -1614,7 +1615,7 @@ static void gen_rot_rm_T1(DisasContext *s, int ot, int op1,
>      }
>
>      /* update eflags.  It is needed anyway most of the time, do it always.  */
> -    gen_compute_eflags(s, cpu_cc_src);
> +    gen_compute_eflags(s);
>      assert(s->cc_op == CC_OP_EFLAGS);
>
>      label2 = gen_new_label();
> @@ -1691,7 +1692,7 @@ static void gen_rot_rm_im(DisasContext *s, int ot, int op1, int op2,
>
>      if (op2 != 0) {
>          /* update eflags */
> -        gen_compute_eflags(s, cpu_cc_src);
> +        gen_compute_eflags(s);
>          assert(s->cc_op == CC_OP_EFLAGS);
>
>          tcg_gen_andi_tl(cpu_cc_src, cpu_cc_src, ~(CC_O | CC_C));
> @@ -1717,9 +1718,7 @@ static void gen_rotc_rm_T1(DisasContext *s, int ot, int op1,
>  {
>      int label1;
>
> -    if (s->cc_op != CC_OP_DYNAMIC)
> -        gen_op_set_cc_op(s->cc_op);
> -    gen_compute_eflags(s, cpu_cc_src);
> +    gen_compute_eflags(s);
>
>      /* load */
>      if (op1 == OR_TMP0)
> @@ -6512,7 +6511,7 @@ static target_ulong disas_insn(DisasContext *s, target_ulong pc_start)
>          if (CODE64(s) && !(s->cpuid_ext3_features & CPUID_EXT3_LAHF_LM))
>              goto illegal_op;
>          gen_op_mov_TN_reg(OT_BYTE, 0, R_AH);
> -        gen_compute_eflags(s, cpu_cc_src);
> +        gen_compute_eflags(s);
>          tcg_gen_andi_tl(cpu_cc_src, cpu_cc_src, CC_O);
>          tcg_gen_andi_tl(cpu_T[0], cpu_T[0], CC_S | CC_Z | CC_A | CC_P | CC_C);
>          tcg_gen_or_tl(cpu_cc_src, cpu_cc_src, cpu_T[0]);
> @@ -6520,21 +6519,21 @@ static target_ulong disas_insn(DisasContext *s, target_ulong pc_start)
>      case 0x9f: /* lahf */
>          if (CODE64(s) && !(s->cpuid_ext3_features & CPUID_EXT3_LAHF_LM))
>              goto illegal_op;
> -        gen_compute_eflags(s, cpu_T[0]);
> +        gen_compute_eflags(s);
>          /* Note: gen_compute_eflags() only gives the condition codes */
> -        tcg_gen_ori_tl(cpu_T[0], cpu_T[0], 0x02);
> +        tcg_gen_ori_tl(cpu_T[0], cpu_cc_src, 0x02);
>          gen_op_mov_reg_T0(OT_BYTE, R_AH);
>          break;
>      case 0xf5: /* cmc */
> -        gen_compute_eflags(s, cpu_cc_src);
> +        gen_compute_eflags(s);
>          tcg_gen_xori_tl(cpu_cc_src, cpu_cc_src, CC_C);
>          break;
>      case 0xf8: /* clc */
> -        gen_compute_eflags(s, cpu_cc_src);
> +        gen_compute_eflags(s);
>          tcg_gen_andi_tl(cpu_cc_src, cpu_cc_src, ~CC_C);
>          break;
>      case 0xf9: /* stc */
> -        gen_compute_eflags(s, cpu_cc_src);
> +        gen_compute_eflags(s);
>          tcg_gen_ori_tl(cpu_cc_src, cpu_cc_src, CC_C);
>          break;
>      case 0xfc: /* cld */
> @@ -6889,7 +6888,7 @@ static target_ulong disas_insn(DisasContext *s, target_ulong pc_start)
>              case 1: /* loopz */
>                  gen_op_add_reg_im(s->aflag, R_ECX, -1);
>                  gen_op_jz_ecx(s->aflag, l3);
> -                gen_compute_eflags(s, cpu_tmp0);
> +                gen_compute_eflags(s);
>                  tcg_gen_andi_tl(cpu_tmp0, cpu_tmp0, CC_Z);
>                  if (b == 0) {
>                      tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_tmp0, 0, l1);
> @@ -7431,7 +7430,7 @@ static target_ulong disas_insn(DisasContext *s, target_ulong pc_start)
>             } else {
>                  gen_op_mov_reg_v(ot, rm, t0);
>              }
> -            gen_compute_eflags(s, cpu_cc_src);
> +            gen_compute_eflags(s);
>              tcg_gen_andi_tl(cpu_cc_src, cpu_cc_src, ~CC_Z);
>              tcg_gen_or_tl(cpu_cc_src, cpu_cc_src, t2);
>              tcg_temp_free(t0);
> --
> 1.7.12.1
>
>
>
Richard Henderson - Oct. 9, 2012, 7:14 p.m.
On 10/06/2012 05:30 AM, Paolo Bonzini wrote:
> +static void gen_compute_eflags(DisasContext *s)
>  {
>      if (s->cc_op != CC_OP_DYNAMIC) {
>          gen_op_set_cc_op(s->cc_op);
>      }
> +    if (s->cc_op == CC_OP_EFLAGS) {
> +        return;
>      }
> +    gen_helper_cc_compute_all(cpu_tmp2_i32, cpu_env, cpu_cc_op);
> +    tcg_gen_discard_tl(cpu_cc_dst);
> +    s->cc_op = CC_OP_EFLAGS;
> +    tcg_gen_extu_i32_tl(cpu_cc_src, cpu_tmp2_i32);
>  }

Can we at this point in the series assert that if s->cc_op == CC_OP_EFLAGS,
then cpu_cc_op has also been assigned CC_OP_EFLAGS?  If so, then we can do

    if (s->cc_op == CC_OP_EFLAGS) {
        return;
    }
    if (s->cc_op != CC_OP_DYNAMIC) {
        gen_op_set_cc_op(s->cc_op);
    }
    ...

As-is it would appear that we get redundant assignments to cpu_cc_op when
calling this routine twice in a row.  And with that helper call in between
we won't be able to eliminate the second assignment.

I'll also note that we'd probably get better code if gen_helper_cc_compute_all
took all of cpu_cc_{op,src,dst} as arguments so that it could be CONST+PURE.
With just that changed I think the redundant assignment to cpu_cc_op would
be eliminated.

All that said, I don't see anything wrong with the patch as-is, and probably
these other things I mention would want to be follow-on patches anyway.

Reviewed-by: Richard Henderson <rth@twiddle.net>


r~

Patch

diff --git a/target-i386/translate.c b/target-i386/translate.c
index 8f22119..09512c3 100644
--- a/target-i386/translate.c
+++ b/target-i386/translate.c
@@ -834,48 +834,49 @@  static void gen_compute_eflags_c(DisasContext *s, TCGv reg)
 }
 
 /* compute all eflags to cc_src */
-static void gen_compute_eflags(DisasContext *s, TCGv reg)
+static void gen_compute_eflags(DisasContext *s)
 {
     if (s->cc_op != CC_OP_DYNAMIC) {
         gen_op_set_cc_op(s->cc_op);
     }
-    gen_helper_cc_compute_all(cpu_tmp2_i32, cpu_env, cpu_cc_op);
-    if (reg == cpu_cc_src) {
-        tcg_gen_discard_tl(cpu_cc_dst);
-        s->cc_op = CC_OP_EFLAGS;
+    if (s->cc_op == CC_OP_EFLAGS) {
+        return;
     }
-    tcg_gen_extu_i32_tl(reg, cpu_tmp2_i32);
+    gen_helper_cc_compute_all(cpu_tmp2_i32, cpu_env, cpu_cc_op);
+    tcg_gen_discard_tl(cpu_cc_dst);
+    s->cc_op = CC_OP_EFLAGS;
+    tcg_gen_extu_i32_tl(cpu_cc_src, cpu_tmp2_i32);
 }
 
 /* compute eflags.P to reg */
 static void gen_compute_eflags_p(DisasContext *s, TCGv reg)
 {
-    gen_compute_eflags(s, reg);
-    tcg_gen_shri_tl(reg, reg, 2);
+    gen_compute_eflags(s);
+    tcg_gen_shri_tl(reg, cpu_cc_src, 2);
     tcg_gen_andi_tl(reg, reg, 1);
 }
 
 /* compute eflags.S to reg */
 static void gen_compute_eflags_s(DisasContext *s, TCGv reg)
 {
-    gen_compute_eflags(s, reg);
-    tcg_gen_shri_tl(reg, reg, 7);
+    gen_compute_eflags(s);
+    tcg_gen_shri_tl(reg, cpu_cc_src, 7);
     tcg_gen_andi_tl(reg, reg, 1);
 }
 
 /* compute eflags.O to reg */
 static void gen_compute_eflags_o(DisasContext *s, TCGv reg)
 {
-    gen_compute_eflags(s, reg);
-    tcg_gen_shri_tl(reg, reg, 11);
+    gen_compute_eflags(s);
+    tcg_gen_shri_tl(reg, cpu_cc_src, 11);
     tcg_gen_andi_tl(reg, reg, 1);
 }
 
 /* compute eflags.Z to reg */
 static void gen_compute_eflags_z(DisasContext *s, TCGv reg)
 {
-    gen_compute_eflags(s, reg);
-    tcg_gen_shri_tl(reg, reg, 6);
+    gen_compute_eflags(s);
+    tcg_gen_shri_tl(reg, cpu_cc_src, 6);
     tcg_gen_andi_tl(reg, reg, 1);
 }
 
@@ -892,9 +893,9 @@  static inline void gen_setcc_slow_T0(DisasContext *s, int jcc_op)
         gen_compute_eflags_z(s, cpu_T[0]);
         break;
     case JCC_BE:
-        gen_compute_eflags(s, cpu_tmp0);
-        tcg_gen_shri_tl(cpu_T[0], cpu_tmp0, 6);
-        tcg_gen_or_tl(cpu_T[0], cpu_T[0], cpu_tmp0);
+        gen_compute_eflags(s);
+        tcg_gen_shri_tl(cpu_T[0], cpu_cc_src, 6);
+        tcg_gen_or_tl(cpu_T[0], cpu_T[0], cpu_cc_src);
         tcg_gen_andi_tl(cpu_T[0], cpu_T[0], 1);
         break;
     case JCC_S:
@@ -904,18 +905,18 @@  static inline void gen_setcc_slow_T0(DisasContext *s, int jcc_op)
         gen_compute_eflags_p(s, cpu_T[0]);
         break;
     case JCC_L:
-        gen_compute_eflags(s, cpu_tmp0);
-        tcg_gen_shri_tl(cpu_T[0], cpu_tmp0, 11); /* CC_O */
-        tcg_gen_shri_tl(cpu_tmp0, cpu_tmp0, 7); /* CC_S */
+        gen_compute_eflags(s);
+        tcg_gen_shri_tl(cpu_T[0], cpu_cc_src, 11); /* CC_O */
+        tcg_gen_shri_tl(cpu_tmp0, cpu_cc_src, 7); /* CC_S */
         tcg_gen_xor_tl(cpu_T[0], cpu_T[0], cpu_tmp0);
         tcg_gen_andi_tl(cpu_T[0], cpu_T[0], 1);
         break;
     default:
     case JCC_LE:
-        gen_compute_eflags(s, cpu_tmp0);
-        tcg_gen_shri_tl(cpu_T[0], cpu_tmp0, 11); /* CC_O */
-        tcg_gen_shri_tl(cpu_tmp4, cpu_tmp0, 7); /* CC_S */
-        tcg_gen_shri_tl(cpu_tmp0, cpu_tmp0, 6); /* CC_Z */
+        gen_compute_eflags(s);
+        tcg_gen_shri_tl(cpu_T[0], cpu_cc_src, 11); /* CC_O */
+        tcg_gen_shri_tl(cpu_tmp4, cpu_cc_src, 7); /* CC_S */
+        tcg_gen_shri_tl(cpu_tmp0, cpu_cc_src, 6); /* CC_Z */
         tcg_gen_xor_tl(cpu_T[0], cpu_T[0], cpu_tmp4);
         tcg_gen_or_tl(cpu_T[0], cpu_T[0], cpu_tmp0);
         tcg_gen_andi_tl(cpu_T[0], cpu_T[0], 1);
@@ -1614,7 +1615,7 @@  static void gen_rot_rm_T1(DisasContext *s, int ot, int op1,
     }
     
     /* update eflags.  It is needed anyway most of the time, do it always.  */
-    gen_compute_eflags(s, cpu_cc_src);
+    gen_compute_eflags(s);
     assert(s->cc_op == CC_OP_EFLAGS);
 
     label2 = gen_new_label();
@@ -1691,7 +1692,7 @@  static void gen_rot_rm_im(DisasContext *s, int ot, int op1, int op2,
 
     if (op2 != 0) {
         /* update eflags */
-        gen_compute_eflags(s, cpu_cc_src);
+        gen_compute_eflags(s);
         assert(s->cc_op == CC_OP_EFLAGS);
 
         tcg_gen_andi_tl(cpu_cc_src, cpu_cc_src, ~(CC_O | CC_C));
@@ -1717,9 +1718,7 @@  static void gen_rotc_rm_T1(DisasContext *s, int ot, int op1,
 {
     int label1;
 
-    if (s->cc_op != CC_OP_DYNAMIC)
-        gen_op_set_cc_op(s->cc_op);
-    gen_compute_eflags(s, cpu_cc_src);
+    gen_compute_eflags(s);
 
     /* load */
     if (op1 == OR_TMP0)
@@ -6512,7 +6511,7 @@  static target_ulong disas_insn(DisasContext *s, target_ulong pc_start)
         if (CODE64(s) && !(s->cpuid_ext3_features & CPUID_EXT3_LAHF_LM))
             goto illegal_op;
         gen_op_mov_TN_reg(OT_BYTE, 0, R_AH);
-        gen_compute_eflags(s, cpu_cc_src);
+        gen_compute_eflags(s);
         tcg_gen_andi_tl(cpu_cc_src, cpu_cc_src, CC_O);
         tcg_gen_andi_tl(cpu_T[0], cpu_T[0], CC_S | CC_Z | CC_A | CC_P | CC_C);
         tcg_gen_or_tl(cpu_cc_src, cpu_cc_src, cpu_T[0]);
@@ -6520,21 +6519,21 @@  static target_ulong disas_insn(DisasContext *s, target_ulong pc_start)
     case 0x9f: /* lahf */
         if (CODE64(s) && !(s->cpuid_ext3_features & CPUID_EXT3_LAHF_LM))
             goto illegal_op;
-        gen_compute_eflags(s, cpu_T[0]);
+        gen_compute_eflags(s);
         /* Note: gen_compute_eflags() only gives the condition codes */
-        tcg_gen_ori_tl(cpu_T[0], cpu_T[0], 0x02);
+        tcg_gen_ori_tl(cpu_T[0], cpu_cc_src, 0x02);
         gen_op_mov_reg_T0(OT_BYTE, R_AH);
         break;
     case 0xf5: /* cmc */
-        gen_compute_eflags(s, cpu_cc_src);
+        gen_compute_eflags(s);
         tcg_gen_xori_tl(cpu_cc_src, cpu_cc_src, CC_C);
         break;
     case 0xf8: /* clc */
-        gen_compute_eflags(s, cpu_cc_src);
+        gen_compute_eflags(s);
         tcg_gen_andi_tl(cpu_cc_src, cpu_cc_src, ~CC_C);
         break;
     case 0xf9: /* stc */
-        gen_compute_eflags(s, cpu_cc_src);
+        gen_compute_eflags(s);
         tcg_gen_ori_tl(cpu_cc_src, cpu_cc_src, CC_C);
         break;
     case 0xfc: /* cld */
@@ -6889,7 +6888,7 @@  static target_ulong disas_insn(DisasContext *s, target_ulong pc_start)
             case 1: /* loopz */
                 gen_op_add_reg_im(s->aflag, R_ECX, -1);
                 gen_op_jz_ecx(s->aflag, l3);
-                gen_compute_eflags(s, cpu_tmp0);
+                gen_compute_eflags(s);
                 tcg_gen_andi_tl(cpu_tmp0, cpu_tmp0, CC_Z);
                 if (b == 0) {
                     tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_tmp0, 0, l1);
@@ -7431,7 +7430,7 @@  static target_ulong disas_insn(DisasContext *s, target_ulong pc_start)
            } else {
                 gen_op_mov_reg_v(ot, rm, t0);
             }
-            gen_compute_eflags(s, cpu_cc_src);
+            gen_compute_eflags(s);
             tcg_gen_andi_tl(cpu_cc_src, cpu_cc_src, ~CC_Z);
             tcg_gen_or_tl(cpu_cc_src, cpu_cc_src, t2);
             tcg_temp_free(t0);