Patchwork [05/14] i386: move eflags computation closer to gen_op_set_cc_op

login
register
mail settings
Submitter Paolo Bonzini
Date Oct. 6, 2012, 12:30 p.m.
Message ID <1349526621-13939-6-git-send-email-pbonzini@redhat.com>
Download mbox | patch
Permalink /patch/189700/
State New
Headers show

Comments

Paolo Bonzini - Oct. 6, 2012, 12:30 p.m.
In some cases this is just simple code movement, ensuring the invariant
that cpu_cc_op matches s->cc_op when calling the helpers.  The next patches
need this because gen_compute_eflags and gen_compute_eflags_c will take
care of setting cpu_cc_op.

Also, for shifts, always compute EFLAGS first since it is needed whenever
the shift is non-zero, i.e. most of the time.  This makes it possible
to remove some writes of CC_OP_EFLAGS to cpu_cc_op and more importantly
removes cases where s->cc_op becomes CC_OP_DYNAMIC.  These are slow and
we want to avoid them: CC_OP_EFLAGS is quite efficient once we paid the
initial cost of computing the flags.

Finally, always follow gen_compute_eflags(cpu_cc_src) by setting s->cc_op
and discarding cpu_cc_dst.

Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 target-i386/translate.c | 32 +++++++++++++++-----------------
 1 file modificato, 15 inserzioni(+), 17 rimozioni(-)
Richard Henderson - Oct. 9, 2012, 7:02 p.m.
On 10/06/2012 05:30 AM, Paolo Bonzini wrote:
> In some cases this is just simple code movement, ensuring the invariant
> that cpu_cc_op matches s->cc_op when calling the helpers.  The next patches
> need this because gen_compute_eflags and gen_compute_eflags_c will take
> care of setting cpu_cc_op.
> 
> Also, for shifts, always compute EFLAGS first since it is needed whenever
> the shift is non-zero, i.e. most of the time.  This makes it possible
> to remove some writes of CC_OP_EFLAGS to cpu_cc_op and more importantly
> removes cases where s->cc_op becomes CC_OP_DYNAMIC.  These are slow and
> we want to avoid them: CC_OP_EFLAGS is quite efficient once we paid the
> initial cost of computing the flags.
> 
> Finally, always follow gen_compute_eflags(cpu_cc_src) by setting s->cc_op
> and discarding cpu_cc_dst.
> 
> Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>

I was about to quibble with some of this, but I see you've
cleaned up all my quibbles with subsequent patches.

Reviewed-by: Richard Henderson <rth@twiddle.net>


r~

Patch

diff --git a/target-i386/translate.c b/target-i386/translate.c
index 38f62eb..0821468 100644
--- a/target-i386/translate.c
+++ b/target-i386/translate.c
@@ -1363,6 +1363,7 @@  static void gen_inc(DisasContext *s1, int ot, int d, int c)
         gen_op_ld_T0_A0(ot + s1->mem_index);
     if (s1->cc_op != CC_OP_DYNAMIC)
         gen_op_set_cc_op(s1->cc_op);
+    gen_compute_eflags_c(cpu_cc_src);
     if (c > 0) {
         tcg_gen_addi_tl(cpu_T[0], cpu_T[0], 1);
         s1->cc_op = CC_OP_INCB + ot;
@@ -1374,7 +1375,6 @@  static void gen_inc(DisasContext *s1, int ot, int d, int c)
         gen_op_mov_reg_T0(ot, d);
     else
         gen_op_st_T0_A0(ot + s1->mem_index);
-    gen_compute_eflags_c(cpu_cc_src);
     tcg_gen_mov_tl(cpu_cc_dst, cpu_T[0]);
 }
 
@@ -1587,14 +1587,16 @@  static void gen_rot_rm_T1(DisasContext *s, int ot, int op1,
         gen_op_mov_reg_v(ot, op1, t0);
     }
     
-    /* update eflags */
+    /* update eflags.  It is needed anyway most of the time, do it always.  */
     if (s->cc_op != CC_OP_DYNAMIC)
         gen_op_set_cc_op(s->cc_op);
+    gen_compute_eflags(cpu_cc_src);
+    tcg_gen_discard_tl(cpu_cc_dst);
+    s->cc_op = CC_OP_EFLAGS;
 
     label2 = gen_new_label();
     tcg_gen_brcondi_tl(TCG_COND_EQ, t1, 0, label2);
 
-    gen_compute_eflags(cpu_cc_src);
     tcg_gen_andi_tl(cpu_cc_src, cpu_cc_src, ~(CC_O | CC_C));
     tcg_gen_xor_tl(cpu_tmp0, t2, t0);
     tcg_gen_lshift(cpu_tmp0, cpu_tmp0, 11 - (data_bits - 1));
@@ -1605,12 +1607,8 @@  static void gen_rot_rm_T1(DisasContext *s, int ot, int op1,
     }
     tcg_gen_andi_tl(t0, t0, CC_C);
     tcg_gen_or_tl(cpu_cc_src, cpu_cc_src, t0);
-    
-    tcg_gen_discard_tl(cpu_cc_dst);
-    tcg_gen_movi_i32(cpu_cc_op, CC_OP_EFLAGS);
-        
+
     gen_set_label(label2);
-    s->cc_op = CC_OP_DYNAMIC; /* cannot predict flags after */
 
     tcg_temp_free(t0);
     tcg_temp_free(t1);
@@ -1674,6 +1672,9 @@  static void gen_rot_rm_im(DisasContext *s, int ot, int op1, int op2,
             gen_op_set_cc_op(s->cc_op);
 
         gen_compute_eflags(cpu_cc_src);
+        tcg_gen_discard_tl(cpu_cc_dst);
+        s->cc_op = CC_OP_EFLAGS;
+
         tcg_gen_andi_tl(cpu_cc_src, cpu_cc_src, ~(CC_O | CC_C));
         tcg_gen_xor_tl(cpu_tmp0, t1, t0);
         tcg_gen_lshift(cpu_tmp0, cpu_tmp0, 11 - (data_bits - 1));
@@ -1684,10 +1685,6 @@  static void gen_rot_rm_im(DisasContext *s, int ot, int op1, int op2,
         }
         tcg_gen_andi_tl(t0, t0, CC_C);
         tcg_gen_or_tl(cpu_cc_src, cpu_cc_src, t0);
-
-        tcg_gen_discard_tl(cpu_cc_dst);
-        tcg_gen_movi_i32(cpu_cc_op, CC_OP_EFLAGS);
-        s->cc_op = CC_OP_EFLAGS;
     }
 
     tcg_temp_free(t0);
@@ -1703,6 +1700,9 @@  static void gen_rotc_rm_T1(DisasContext *s, int ot, int op1,
 
     if (s->cc_op != CC_OP_DYNAMIC)
         gen_op_set_cc_op(s->cc_op);
+    gen_compute_eflags(cpu_cc_src);
+    tcg_gen_discard_tl(cpu_cc_dst);
+    s->cc_op = CC_OP_EFLAGS;
 
     /* load */
     if (op1 == OR_TMP0)
@@ -1756,11 +1756,7 @@  static void gen_rotc_rm_T1(DisasContext *s, int ot, int op1,
     tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_cc_tmp, -1, label1);
 
     tcg_gen_mov_tl(cpu_cc_src, cpu_cc_tmp);
-    tcg_gen_discard_tl(cpu_cc_dst);
-    tcg_gen_movi_i32(cpu_cc_op, CC_OP_EFLAGS);
-        
     gen_set_label(label1);
-    s->cc_op = CC_OP_DYNAMIC; /* cannot predict flags after */
 }
 
 /* XXX: add faster immediate case */
@@ -6501,10 +6497,12 @@  static target_ulong disas_insn(DisasContext *s, target_ulong pc_start)
         if (s->cc_op != CC_OP_DYNAMIC)
             gen_op_set_cc_op(s->cc_op);
         gen_compute_eflags(cpu_cc_src);
+        tcg_gen_discard_tl(cpu_cc_dst);
+        s->cc_op = CC_OP_EFLAGS;
+
         tcg_gen_andi_tl(cpu_cc_src, cpu_cc_src, CC_O);
         tcg_gen_andi_tl(cpu_T[0], cpu_T[0], CC_S | CC_Z | CC_A | CC_P | CC_C);
         tcg_gen_or_tl(cpu_cc_src, cpu_cc_src, cpu_T[0]);
-        s->cc_op = CC_OP_EFLAGS;
         break;
     case 0x9f: /* lahf */
         if (CODE64(s) && !(s->cpuid_ext3_features & CPUID_EXT3_LAHF_LM))