Patchwork [42/57] target-i386: Use CC_SRC2 for ADC and SBB

login
register
mail settings
Submitter Richard Henderson
Date Feb. 19, 2013, 5:40 p.m.
Message ID <1361295631-21316-43-git-send-email-rth@twiddle.net>
Download mbox | patch
Permalink /patch/221755/
State New
Headers show

Comments

Richard Henderson - Feb. 19, 2013, 5:40 p.m.
Add another slot in ENV and store two of the three inputs.  This lets us
do less work when carry-out is not needed, and avoids the unpredictable
CC_OP after translating these insns.

Signed-off-by: Richard Henderson <rth@twiddle.net>
---
 target-i386/cc_helper.c          | 40 ++++++++++----------
 target-i386/cc_helper_template.h | 26 +++++++------
 target-i386/cpu.h                | 10 +++--
 target-i386/helper.h             |  4 +-
 target-i386/translate.c          | 80 ++++++++++++++++------------------------
 5 files changed, 75 insertions(+), 85 deletions(-)

Patch

diff --git a/target-i386/cc_helper.c b/target-i386/cc_helper.c
index a5d8181..218a9b5 100644
--- a/target-i386/cc_helper.c
+++ b/target-i386/cc_helper.c
@@ -75,7 +75,8 @@  const uint8_t parity_table[256] = {
 
 #endif
 
-target_ulong helper_cc_compute_all(target_ulong dst, target_ulong src1, int op)
+target_ulong helper_cc_compute_all(target_ulong dst, target_ulong src1,
+                                   target_ulong src2, int op)
 {
     switch (op) {
     default: /* should never happen */
@@ -99,11 +100,11 @@  target_ulong helper_cc_compute_all(target_ulong dst, target_ulong src1, int op)
         return compute_all_addl(dst, src1);
 
     case CC_OP_ADCB:
-        return compute_all_adcb(dst, src1);
+        return compute_all_adcb(dst, src1, src2);
     case CC_OP_ADCW:
-        return compute_all_adcw(dst, src1);
+        return compute_all_adcw(dst, src1, src2);
     case CC_OP_ADCL:
-        return compute_all_adcl(dst, src1);
+        return compute_all_adcl(dst, src1, src2);
 
     case CC_OP_SUBB:
         return compute_all_subb(dst, src1);
@@ -113,11 +114,11 @@  target_ulong helper_cc_compute_all(target_ulong dst, target_ulong src1, int op)
         return compute_all_subl(dst, src1);
 
     case CC_OP_SBBB:
-        return compute_all_sbbb(dst, src1);
+        return compute_all_sbbb(dst, src1, src2);
     case CC_OP_SBBW:
-        return compute_all_sbbw(dst, src1);
+        return compute_all_sbbw(dst, src1, src2);
     case CC_OP_SBBL:
-        return compute_all_sbbl(dst, src1);
+        return compute_all_sbbl(dst, src1, src2);
 
     case CC_OP_LOGICB:
         return compute_all_logicb(dst, src1);
@@ -160,11 +161,11 @@  target_ulong helper_cc_compute_all(target_ulong dst, target_ulong src1, int op)
     case CC_OP_ADDQ:
         return compute_all_addq(dst, src1);
     case CC_OP_ADCQ:
-        return compute_all_adcq(dst, src1);
+        return compute_all_adcq(dst, src1, src2);
     case CC_OP_SUBQ:
         return compute_all_subq(dst, src1);
     case CC_OP_SBBQ:
-        return compute_all_sbbq(dst, src1);
+        return compute_all_sbbq(dst, src1, src2);
     case CC_OP_LOGICQ:
         return compute_all_logicq(dst, src1);
     case CC_OP_INCQ:
@@ -181,10 +182,11 @@  target_ulong helper_cc_compute_all(target_ulong dst, target_ulong src1, int op)
 
 uint32_t cpu_cc_compute_all(CPUX86State *env, int op)
 {
-    return helper_cc_compute_all(CC_DST, CC_SRC, op);
+    return helper_cc_compute_all(CC_DST, CC_SRC, CC_SRC2, op);
 }
 
-target_ulong helper_cc_compute_c(target_ulong dst, target_ulong src1, int op)
+target_ulong helper_cc_compute_c(target_ulong dst, target_ulong src1,
+                                 target_ulong src2, int op)
 {
     switch (op) {
     default: /* should never happen */
@@ -225,11 +227,11 @@  target_ulong helper_cc_compute_c(target_ulong dst, target_ulong src1, int op)
         return compute_c_addl(dst, src1);
 
     case CC_OP_ADCB:
-        return compute_c_adcb(dst, src1);
+        return compute_c_adcb(dst, src1, src2);
     case CC_OP_ADCW:
-        return compute_c_adcw(dst, src1);
+        return compute_c_adcw(dst, src1, src2);
     case CC_OP_ADCL:
-        return compute_c_adcl(dst, src1);
+        return compute_c_adcl(dst, src1, src2);
 
     case CC_OP_SUBB:
         return compute_c_subb(dst, src1);
@@ -239,11 +241,11 @@  target_ulong helper_cc_compute_c(target_ulong dst, target_ulong src1, int op)
         return compute_c_subl(dst, src1);
 
     case CC_OP_SBBB:
-        return compute_c_sbbb(dst, src1);
+        return compute_c_sbbb(dst, src1, src2);
     case CC_OP_SBBW:
-        return compute_c_sbbw(dst, src1);
+        return compute_c_sbbw(dst, src1, src2);
     case CC_OP_SBBL:
-        return compute_c_sbbl(dst, src1);
+        return compute_c_sbbl(dst, src1, src2);
 
     case CC_OP_SHLB:
         return compute_c_shlb(dst, src1);
@@ -256,11 +258,11 @@  target_ulong helper_cc_compute_c(target_ulong dst, target_ulong src1, int op)
     case CC_OP_ADDQ:
         return compute_c_addq(dst, src1);
     case CC_OP_ADCQ:
-        return compute_c_adcq(dst, src1);
+        return compute_c_adcq(dst, src1, src2);
     case CC_OP_SUBQ:
         return compute_c_subq(dst, src1);
     case CC_OP_SBBQ:
-        return compute_c_sbbq(dst, src1);
+        return compute_c_sbbq(dst, src1, src2);
     case CC_OP_SHLQ:
         return compute_c_shlq(dst, src1);
 #endif
diff --git a/target-i386/cc_helper_template.h b/target-i386/cc_helper_template.h
index 522b462..87f47d2 100644
--- a/target-i386/cc_helper_template.h
+++ b/target-i386/cc_helper_template.h
@@ -58,12 +58,13 @@  static int glue(compute_c_add, SUFFIX)(DATA_TYPE dst, DATA_TYPE src1)
     return dst < src1;
 }
 
-static int glue(compute_all_adc, SUFFIX)(DATA_TYPE dst, DATA_TYPE src1)
+static int glue(compute_all_adc, SUFFIX)(DATA_TYPE dst, DATA_TYPE src1,
+                                         DATA_TYPE src3)
 {
     int cf, pf, af, zf, sf, of;
-    DATA_TYPE src2 = dst - src1 - 1;
+    DATA_TYPE src2 = dst - src1 - src3;
 
-    cf = dst <= src1;
+    cf = (src3 ? dst <= src1 : dst < src1);
     pf = parity_table[(uint8_t)dst];
     af = (dst ^ src1 ^ src2) & 0x10;
     zf = (dst == 0) << 6;
@@ -72,9 +73,10 @@  static int glue(compute_all_adc, SUFFIX)(DATA_TYPE dst, DATA_TYPE src1)
     return cf | pf | af | zf | sf | of;
 }
 
-static int glue(compute_c_adc, SUFFIX)(DATA_TYPE dst, DATA_TYPE src1)
+static int glue(compute_c_adc, SUFFIX)(DATA_TYPE dst, DATA_TYPE src1,
+                                       DATA_TYPE src3)
 {
-    return dst <= src1;
+    return src3 ? dst <= src1 : dst < src1;
 }
 
 static int glue(compute_all_sub, SUFFIX)(DATA_TYPE dst, DATA_TYPE src2)
@@ -98,12 +100,13 @@  static int glue(compute_c_sub, SUFFIX)(DATA_TYPE dst, DATA_TYPE src2)
     return src1 < src2;
 }
 
-static int glue(compute_all_sbb, SUFFIX)(DATA_TYPE dst, DATA_TYPE src2)
+static int glue(compute_all_sbb, SUFFIX)(DATA_TYPE dst, DATA_TYPE src2,
+                                         DATA_TYPE src3)
 {
     int cf, pf, af, zf, sf, of;
-    DATA_TYPE src1 = dst + src2 + 1;
+    DATA_TYPE src1 = dst + src2 + src3;
 
-    cf = src1 <= src2;
+    cf = (src3 ? src1 <= src2 : src1 < src2);
     pf = parity_table[(uint8_t)dst];
     af = (dst ^ src1 ^ src2) & 0x10;
     zf = (dst == 0) << 6;
@@ -112,11 +115,12 @@  static int glue(compute_all_sbb, SUFFIX)(DATA_TYPE dst, DATA_TYPE src2)
     return cf | pf | af | zf | sf | of;
 }
 
-static int glue(compute_c_sbb, SUFFIX)(DATA_TYPE dst, DATA_TYPE src2)
+static int glue(compute_c_sbb, SUFFIX)(DATA_TYPE dst, DATA_TYPE src2,
+                                       DATA_TYPE src3)
 {
-    DATA_TYPE src1 = dst + src2 + 1;
+    DATA_TYPE src1 = dst + src2 + src3;
 
-    return src1 <= src2;
+    return (src3 ? src1 <= src2 : src1 < src2);
 }
 
 static int glue(compute_all_logic, SUFFIX)(DATA_TYPE dst, DATA_TYPE src1)
diff --git a/target-i386/cpu.h b/target-i386/cpu.h
index 8c4c605..1fa9dc8 100644
--- a/target-i386/cpu.h
+++ b/target-i386/cpu.h
@@ -725,8 +725,9 @@  typedef struct CPUX86State {
                         stored elsewhere */
 
     /* emulator internal eflags handling */
-    target_ulong cc_src;
     target_ulong cc_dst;
+    target_ulong cc_src;
+    target_ulong cc_src2;
     uint32_t cc_op;
     int32_t df; /* D flag : 1 if D = 0, -1 if D = 1 */
     uint32_t hflags; /* TB flags, see HF_xxx constants. These flags
@@ -1116,9 +1117,10 @@  static inline int cpu_mmu_index (CPUX86State *env)
 #define EIP (env->eip)
 #define DF  (env->df)
 
-#define CC_SRC (env->cc_src)
-#define CC_DST (env->cc_dst)
-#define CC_OP  (env->cc_op)
+#define CC_DST  (env->cc_dst)
+#define CC_SRC  (env->cc_src)
+#define CC_SRC2 (env->cc_src2)
+#define CC_OP   (env->cc_op)
 
 /* n must be a constant to be efficient */
 static inline target_long lshift(target_long x, int n)
diff --git a/target-i386/helper.h b/target-i386/helper.h
index 901ff73..4c46ab1 100644
--- a/target-i386/helper.h
+++ b/target-i386/helper.h
@@ -1,7 +1,7 @@ 
 #include "exec/def-helper.h"
 
-DEF_HELPER_FLAGS_3(cc_compute_all, TCG_CALL_NO_RWG_SE, tl, tl, tl, int)
-DEF_HELPER_FLAGS_3(cc_compute_c, TCG_CALL_NO_RWG_SE, tl, tl, tl, int)
+DEF_HELPER_FLAGS_4(cc_compute_all, TCG_CALL_NO_RWG_SE, tl, tl, tl, tl, int)
+DEF_HELPER_FLAGS_4(cc_compute_c, TCG_CALL_NO_RWG_SE, tl, tl, tl, tl, int)
 
 DEF_HELPER_0(lock, void)
 DEF_HELPER_0(unlock, void)
diff --git a/target-i386/translate.c b/target-i386/translate.c
index 5235aff..f667f93 100644
--- a/target-i386/translate.c
+++ b/target-i386/translate.c
@@ -61,7 +61,7 @@ 
 /* global register indexes */
 static TCGv_ptr cpu_env;
 static TCGv cpu_A0;
-static TCGv cpu_cc_src, cpu_cc_dst, cpu_cc_srcT;
+static TCGv cpu_cc_dst, cpu_cc_src, cpu_cc_src2, cpu_cc_srcT;
 static TCGv_i32 cpu_cc_op;
 static TCGv cpu_regs[CPU_NB_REGS];
 /* local temps */
@@ -188,18 +188,19 @@  enum {
 enum {
     USES_CC_DST  = 1,
     USES_CC_SRC  = 2,
-    USES_CC_SRCT = 4,
+    USES_CC_SRC2 = 4,
+    USES_CC_SRCT = 8,
 };
 
 /* Bit set if the global variable is live after setting CC_OP to X.  */
 static const uint8_t cc_op_live[CC_OP_NB] = {
-    [CC_OP_DYNAMIC] = USES_CC_DST | USES_CC_SRC,
+    [CC_OP_DYNAMIC] = USES_CC_DST | USES_CC_SRC | USES_CC_SRC2,
     [CC_OP_EFLAGS] = USES_CC_SRC,
     [CC_OP_MULB ... CC_OP_MULQ] = USES_CC_DST | USES_CC_SRC,
     [CC_OP_ADDB ... CC_OP_ADDQ] = USES_CC_DST | USES_CC_SRC,
-    [CC_OP_ADCB ... CC_OP_ADCQ] = USES_CC_DST | USES_CC_SRC,
+    [CC_OP_ADCB ... CC_OP_ADCQ] = USES_CC_DST | USES_CC_SRC | USES_CC_SRC2,
     [CC_OP_SUBB ... CC_OP_SUBQ] = USES_CC_DST | USES_CC_SRC | USES_CC_SRCT,
-    [CC_OP_SBBB ... CC_OP_SBBQ] = USES_CC_DST | USES_CC_SRC,
+    [CC_OP_SBBB ... CC_OP_SBBQ] = USES_CC_DST | USES_CC_SRC | USES_CC_SRC2,
     [CC_OP_LOGICB ... CC_OP_LOGICQ] = USES_CC_DST,
     [CC_OP_INCB ... CC_OP_INCQ] = USES_CC_DST | USES_CC_SRC,
     [CC_OP_DECB ... CC_OP_DECQ] = USES_CC_DST | USES_CC_SRC,
@@ -223,6 +224,9 @@  static void set_cc_op(DisasContext *s, CCOp op)
     if (dead & USES_CC_SRC) {
         tcg_gen_discard_tl(cpu_cc_src);
     }
+    if (dead & USES_CC_SRC2) {
+        tcg_gen_discard_tl(cpu_cc_src2);
+    }
     if (dead & USES_CC_SRCT) {
         tcg_gen_discard_tl(cpu_cc_srcT);
     }
@@ -867,6 +871,13 @@  static void gen_op_update2_cc(void)
     tcg_gen_mov_tl(cpu_cc_dst, cpu_T[0]);
 }
 
+static void gen_op_update3_cc(TCGv reg)
+{
+    tcg_gen_mov_tl(cpu_cc_src2, reg);
+    tcg_gen_mov_tl(cpu_cc_src, cpu_T[1]);
+    tcg_gen_mov_tl(cpu_cc_dst, cpu_T[0]);
+}
+
 static inline void gen_op_testl_T0_T1_cc(void)
 {
     tcg_gen_and_tl(cpu_cc_dst, cpu_T[0], cpu_T[1]);
@@ -882,7 +893,7 @@  static void gen_op_update_neg_cc(void)
 /* compute all eflags to cc_src */
 static void gen_compute_eflags(DisasContext *s)
 {
-    TCGv zero, dst, src1;
+    TCGv zero, dst, src1, src2;
     int live, dead;
 
     if (s->cc_op == CC_OP_EFLAGS) {
@@ -892,10 +903,11 @@  static void gen_compute_eflags(DisasContext *s)
     TCGV_UNUSED(zero);
     dst = cpu_cc_dst;
     src1 = cpu_cc_src;
+    src2 = cpu_cc_src2;
 
     /* Take care to not read values that are not live.  */
     live = cc_op_live[s->cc_op] & ~USES_CC_SRCT;
-    dead = live ^ (USES_CC_DST | USES_CC_SRC);
+    dead = live ^ (USES_CC_DST | USES_CC_SRC | USES_CC_SRC2);
     if (dead) {
         zero = tcg_const_tl(0);
         if (dead & USES_CC_DST) {
@@ -904,10 +916,13 @@  static void gen_compute_eflags(DisasContext *s)
         if (dead & USES_CC_SRC) {
             src1 = zero;
         }
+        if (dead & USES_CC_SRC2) {
+            src2 = zero;
+        }
     }
 
     gen_update_cc_op(s);
-    gen_helper_cc_compute_all(cpu_cc_src, dst, src1, cpu_cc_op);
+    gen_helper_cc_compute_all(cpu_cc_src, dst, src1, src2, cpu_cc_op);
     set_cc_op(s, CC_OP_EFLAGS);
 
     if (dead) {
@@ -951,30 +966,6 @@  static CCPrepare gen_prepare_eflags_c(DisasContext *s, TCGv reg)
         return (CCPrepare) { .cond = TCG_COND_LTU, .reg = t0,
                              .reg2 = t1, .mask = -1, .use_reg2 = true };
 
-    case CC_OP_SBBB ... CC_OP_SBBQ:
-        /* (DATA_TYPE)(CC_DST + CC_SRC + 1) <= (DATA_TYPE)CC_SRC */
-        size = s->cc_op - CC_OP_SBBB;
-        t1 = gen_ext_tl(cpu_tmp0, cpu_cc_src, size, false);
-        if (TCGV_EQUAL(t1, reg) && TCGV_EQUAL(reg, cpu_cc_src)) {
-            tcg_gen_mov_tl(cpu_tmp0, cpu_cc_src);
-            t1 = cpu_tmp0;
-        }
-
-        tcg_gen_add_tl(reg, cpu_cc_dst, cpu_cc_src);
-        tcg_gen_addi_tl(reg, reg, 1);
-        gen_extu(size, reg);
-        t0 = reg;
-        goto adc_sbb;
-
-    case CC_OP_ADCB ... CC_OP_ADCQ:
-        /* (DATA_TYPE)CC_DST <= (DATA_TYPE)CC_SRC */
-        size = s->cc_op - CC_OP_ADCB;
-        t1 = gen_ext_tl(cpu_tmp0, cpu_cc_src, size, false);
-        t0 = gen_ext_tl(reg, cpu_cc_dst, size, false);
-    adc_sbb:
-        return (CCPrepare) { .cond = TCG_COND_LEU, .reg = t0,
-                             .reg2 = t1, .mask = -1, .use_reg2 = true };
-
     case CC_OP_LOGICB ... CC_OP_LOGICQ:
         return (CCPrepare) { .cond = TCG_COND_NEVER, .mask = -1 };
 
@@ -1004,7 +995,8 @@  static CCPrepare gen_prepare_eflags_c(DisasContext *s, TCGv reg)
        /* The need to compute only C from CC_OP_DYNAMIC is important
           in efficiently implementing e.g. INC at the start of a TB.  */
        gen_update_cc_op(s);
-       gen_helper_cc_compute_c(reg, cpu_cc_dst, cpu_cc_src, cpu_cc_op);
+       gen_helper_cc_compute_c(reg, cpu_cc_dst, cpu_cc_src,
+                               cpu_cc_src2, cpu_cc_op);
        return (CCPrepare) { .cond = TCG_COND_NE, .reg = reg,
                             .mask = -1, .no_setcond = true };
     }
@@ -1442,18 +1434,10 @@  static void gen_op(DisasContext *s1, int op, int ot, int d)
             gen_op_mov_reg_T0(ot, d);
         else
             gen_op_st_T0_A0(ot + s1->mem_index);
-        tcg_gen_mov_tl(cpu_cc_src, cpu_T[1]);
-        tcg_gen_mov_tl(cpu_cc_dst, cpu_T[0]);
-        tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_tmp4);
-        tcg_gen_shli_i32(cpu_tmp2_i32, cpu_tmp2_i32, 2);
-        tcg_gen_addi_i32(cpu_cc_op, cpu_tmp2_i32, CC_OP_ADDB + ot);
-        set_cc_op(s1, CC_OP_DYNAMIC);
+        gen_op_update3_cc(cpu_tmp4);
+        set_cc_op(s1, CC_OP_ADCB + ot);
         break;
     case OP_SBBL:
-        /*
-         * No need to store cpu_cc_srcT, because it is used only
-         * when the cc_op is known.
-         */
         gen_compute_eflags_c(s1, cpu_tmp4);
         tcg_gen_sub_tl(cpu_T[0], cpu_T[0], cpu_T[1]);
         tcg_gen_sub_tl(cpu_T[0], cpu_T[0], cpu_tmp4);
@@ -1461,12 +1445,8 @@  static void gen_op(DisasContext *s1, int op, int ot, int d)
             gen_op_mov_reg_T0(ot, d);
         else
             gen_op_st_T0_A0(ot + s1->mem_index);
-        tcg_gen_mov_tl(cpu_cc_src, cpu_T[1]);
-        tcg_gen_mov_tl(cpu_cc_dst, cpu_T[0]);
-        tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_tmp4);
-        tcg_gen_shli_i32(cpu_tmp2_i32, cpu_tmp2_i32, 2);
-        tcg_gen_addi_i32(cpu_cc_op, cpu_tmp2_i32, CC_OP_SUBB + ot);
-        set_cc_op(s1, CC_OP_DYNAMIC);
+        gen_op_update3_cc(cpu_tmp4);
+        set_cc_op(s1, CC_OP_SBBB + ot);
         break;
     case OP_ADDL:
         gen_op_addl_T0_T1();
@@ -7788,6 +7768,8 @@  void optimize_flags_init(void)
                                     "cc_dst");
     cpu_cc_src = tcg_global_mem_new(TCG_AREG0, offsetof(CPUX86State, cc_src),
                                     "cc_src");
+    cpu_cc_src2 = tcg_global_mem_new(TCG_AREG0, offsetof(CPUX86State, cc_src2),
+                                     "cc_src2");
 
 #ifdef TARGET_X86_64
     cpu_regs[R_EAX] = tcg_global_mem_new_i64(TCG_AREG0,