Patchwork [4/7] tcg-i386: Implement setcond, movcond, setcond2.

login
register
mail settings
Submitter Richard Henderson
Date Dec. 16, 2009, 11:17 p.m.
Message ID <32ca61571b6345f89337bfef816e2606b3995f68.1261012798.git.rth@twiddle.net>
Download mbox | patch
Permalink /patch/41296/
State New
Headers show

Comments

Richard Henderson - Dec. 16, 2009, 11:17 p.m.
An initial cut at conditional moves for the i386 backend.

Signed-off-by: Richard Henderson <rth@twiddle.net>
---
 elf.h                 |    2 +
 tcg/i386/tcg-target.c |  280 ++++++++++++++++++++++++++++++++++++++++---------
 2 files changed, 233 insertions(+), 49 deletions(-)

Patch

diff --git a/elf.h b/elf.h
index 11674d7..c84c8ab 100644
--- a/elf.h
+++ b/elf.h
@@ -243,6 +243,8 @@  typedef struct {
 #define R_386_GOTOFF	9
 #define R_386_GOTPC	10
 #define R_386_NUM	11
+/* Not a dynamic reloc, so not included in R_386_NUM.  Used in TCG.  */
+#define R_386_PC8	23
 
 #define R_MIPS_NONE		0
 #define R_MIPS_16		1
diff --git a/tcg/i386/tcg-target.c b/tcg/i386/tcg-target.c
index 972b102..90dbbe9 100644
--- a/tcg/i386/tcg-target.c
+++ b/tcg/i386/tcg-target.c
@@ -61,6 +61,9 @@  static void patch_reloc(uint8_t *code_ptr, int type,
     case R_386_PC32:
         *(uint32_t *)code_ptr = value - (long)code_ptr;
         break;
+    case R_386_PC8:
+        *(uint8_t *)code_ptr = value - (long)code_ptr;
+        break;
     default:
         tcg_abort();
     }
@@ -305,7 +308,8 @@  static void tcg_out_addi(TCGContext *s, int reg, tcg_target_long val)
         tgen_arithi(s, ARITH_ADD, reg, val, 0);
 }
 
-static void tcg_out_jxx(TCGContext *s, int opc, int label_index)
+/* Use SMALL != 0 to force a short forward branch.  */
+static void tcg_out_jxx(TCGContext *s, int opc, int label_index, int small)
 {
     int32_t val, val1;
     TCGLabel *l = &s->labels[label_index];
@@ -320,6 +324,7 @@  static void tcg_out_jxx(TCGContext *s, int opc, int label_index)
                 tcg_out8(s, 0x70 + opc);
             tcg_out8(s, val1);
         } else {
+            assert (!small);
             if (opc == -1) {
                 tcg_out8(s, 0xe9);
                 tcg_out32(s, val - 5);
@@ -329,6 +334,15 @@  static void tcg_out_jxx(TCGContext *s, int opc, int label_index)
                 tcg_out32(s, val - 6);
             }
         }
+    } else if (small) {
+        if (opc == -1) {
+            tcg_out8(s, 0xeb);
+        } else {
+            tcg_out8(s, 0x0f);
+            tcg_out8(s, 0x70 + opc);
+        }
+        tcg_out_reloc(s, s->code_ptr, R_386_PC8, label_index, -1);
+        s->code_ptr += 1;
     } else {
         if (opc == -1) {
             tcg_out8(s, 0xe9);
@@ -341,9 +355,8 @@  static void tcg_out_jxx(TCGContext *s, int opc, int label_index)
     }
 }
 
-static void tcg_out_brcond(TCGContext *s, int cond, 
-                           TCGArg arg1, TCGArg arg2, int const_arg2,
-                           int label_index)
+static void tcg_out_cond(TCGContext *s, int cond,
+			 TCGArg arg1, TCGArg arg2, int const_arg2)
 {
     if (const_arg2) {
         if (arg2 == 0) {
@@ -355,71 +368,225 @@  static void tcg_out_brcond(TCGContext *s, int cond,
     } else {
         tcg_out_modrm(s, 0x01 | (ARITH_CMP << 3), arg2, arg1);
     }
-    tcg_out_jxx(s, tcg_cond_to_jcc[cond], label_index);
+}
+
+static void tcg_out_brcond(TCGContext *s, int cond, 
+                           TCGArg arg1, TCGArg arg2, int const_arg2,
+                           int label_index, int small)
+{
+    tcg_out_cond(s, cond, arg1, arg2, const_arg2);
+    tcg_out_jxx(s, tcg_cond_to_jcc[cond], label_index, small);
 }
 
 /* XXX: we implement it at the target level to avoid having to
    handle cross basic blocks temporaries */
-static void tcg_out_brcond2(TCGContext *s,
-                            const TCGArg *args, const int *const_args)
+static void tcg_out_brcond2(TCGContext *s, const TCGArg *args,
+                            const int *const_args, int small)
 {
-    int label_next;
-    label_next = gen_new_label();
-    switch(args[4]) {
+    int label_next = gen_new_label();
+    int label_dest = args[5];
+    int cond = args[4], c1, c2, c3;
+
+    switch (cond) {
     case TCG_COND_EQ:
-        tcg_out_brcond(s, TCG_COND_NE, args[0], args[2], const_args[2], label_next);
-        tcg_out_brcond(s, TCG_COND_EQ, args[1], args[3], const_args[3], args[5]);
+        c1 = -1, c2 = TCG_COND_NE, c3 = TCG_COND_EQ;
         break;
     case TCG_COND_NE:
-        tcg_out_brcond(s, TCG_COND_NE, args[0], args[2], const_args[2], args[5]);
-        tcg_out_brcond(s, TCG_COND_NE, args[1], args[3], const_args[3], args[5]);
+        c1 = TCG_COND_NE, c2 = -1, c3 = TCG_COND_NE;
         break;
     case TCG_COND_LT:
-        tcg_out_brcond(s, TCG_COND_LT, args[1], args[3], const_args[3], args[5]);
-        tcg_out_jxx(s, JCC_JNE, label_next);
-        tcg_out_brcond(s, TCG_COND_LTU, args[0], args[2], const_args[2], args[5]);
-        break;
-    case TCG_COND_LE:
-        tcg_out_brcond(s, TCG_COND_LT, args[1], args[3], const_args[3], args[5]);
-        tcg_out_jxx(s, JCC_JNE, label_next);
-        tcg_out_brcond(s, TCG_COND_LEU, args[0], args[2], const_args[2], args[5]);
-        break;
-    case TCG_COND_GT:
-        tcg_out_brcond(s, TCG_COND_GT, args[1], args[3], const_args[3], args[5]);
-        tcg_out_jxx(s, JCC_JNE, label_next);
-        tcg_out_brcond(s, TCG_COND_GTU, args[0], args[2], const_args[2], args[5]);
-        break;
-    case TCG_COND_GE:
-        tcg_out_brcond(s, TCG_COND_GT, args[1], args[3], const_args[3], args[5]);
-        tcg_out_jxx(s, JCC_JNE, label_next);
-        tcg_out_brcond(s, TCG_COND_GEU, args[0], args[2], const_args[2], args[5]);
-        break;
     case TCG_COND_LTU:
-        tcg_out_brcond(s, TCG_COND_LTU, args[1], args[3], const_args[3], args[5]);
-        tcg_out_jxx(s, JCC_JNE, label_next);
-        tcg_out_brcond(s, TCG_COND_LTU, args[0], args[2], const_args[2], args[5]);
+        c1 = cond, c2 = TCG_COND_NE, c3 = TCG_COND_LTU;
         break;
+    case TCG_COND_LE:
     case TCG_COND_LEU:
-        tcg_out_brcond(s, TCG_COND_LTU, args[1], args[3], const_args[3], args[5]);
-        tcg_out_jxx(s, JCC_JNE, label_next);
-        tcg_out_brcond(s, TCG_COND_LEU, args[0], args[2], const_args[2], args[5]);
+        c1 = cond, c2 = TCG_COND_NE, c3 = TCG_COND_LEU;
         break;
+    case TCG_COND_GT:
     case TCG_COND_GTU:
-        tcg_out_brcond(s, TCG_COND_GTU, args[1], args[3], const_args[3], args[5]);
-        tcg_out_jxx(s, JCC_JNE, label_next);
-        tcg_out_brcond(s, TCG_COND_GTU, args[0], args[2], const_args[2], args[5]);
+        c1 = cond, c2 = TCG_COND_NE, c3 = TCG_COND_GTU;
         break;
+    case TCG_COND_GE:
     case TCG_COND_GEU:
-        tcg_out_brcond(s, TCG_COND_GTU, args[1], args[3], const_args[3], args[5]);
-        tcg_out_jxx(s, JCC_JNE, label_next);
-        tcg_out_brcond(s, TCG_COND_GEU, args[0], args[2], const_args[2], args[5]);
+        c1 = cond, c2 = TCG_COND_NE, c3 = TCG_COND_GEU;
         break;
     default:
-        tcg_abort();
+        tcg_abort ();
+    }
+
+    tcg_out_cond(s, cond, args[1], args[3], const_args[3]);
+    if (c1 != -1) {
+        tcg_out_jxx(s, tcg_cond_to_jcc[c1], label_dest, small);
+    }
+    if (c2 != -1) {
+        tcg_out_jxx(s, tcg_cond_to_jcc[c2], label_next, 1);
     }
+    tcg_out_brcond(s, c3, args[0], args[2], const_args[2], label_dest, small);
+
     tcg_out_label(s, label_next, (tcg_target_long)s->code_ptr);
 }
 
+static void tcg_out_setcond(TCGContext *s, int cond, TCGArg arg0,
+                            TCGArg arg1, TCGArg arg2, int const_arg2)
+{
+    int use_xor = (arg0 != arg1 && (const_arg2 || arg0 != arg2));
+
+    if (use_xor)
+        tcg_out_movi(s, TCG_TYPE_I32, arg0, 0);
+    tcg_out_cond(s, cond, arg1, arg2, const_arg2);
+    tcg_out_modrm(s, 0x90 | tcg_cond_to_jcc[cond] | P_EXT, 0, arg0);
+    if (!use_xor)
+        tgen_arithi(s, ARITH_AND, arg0, 0xff, 0);
+}
+
+static void tcg_out_setcond2(TCGContext *s, const TCGArg *args,
+                             const int *const_args)
+{
+    int overlapl, overlaph;
+    TCGArg new_args[6];
+    int label_true, label_over;
+
+    overlapl = (args[0] == args[1] || (!const_args[3] && args[0] == args[3]));
+    overlaph = (args[0] == args[2] || (!const_args[4] && args[0] == args[4]));
+    memcpy(new_args, args+1, 5*sizeof(TCGArg));
+
+    if (!overlapl && !overlaph) {
+        /* ??? For EQ and NE, and output register in 'q', we could
+           implement this as cmp lows; setb %al; cmp highs; setb %ah;
+           andb %ah, %al; movzbl %al, %eax it's not clear it's worth
+           it though.  */
+
+        /* When possible, clear the destination first and increment in
+           the true case.  This results in smaller code than the
+           general case below.  */
+        tcg_out_movi(s, TCG_TYPE_I32, args[0], 0);
+
+        label_over = gen_new_label();
+        new_args[5] = label_over;
+        tcg_out_brcond2(s, new_args, const_args+1, 1);
+
+        tgen_arithi(s, ARITH_ADD, args[0], 1, 0);
+        tcg_out_label(s, label_over, (tcg_target_long)s->code_ptr);
+    } else {
+        label_true = gen_new_label();
+        label_over = gen_new_label();
+
+        new_args[5] = label_true;
+        tcg_out_brcond2(s, new_args, const_args+1, 1);
+
+        tcg_out_movi(s, TCG_TYPE_I32, args[0], 0);
+        tcg_out_jxx(s, JCC_JMP, label_over, 1);
+        tcg_out_label(s, label_true, (tcg_target_long)s->code_ptr);
+
+        tcg_out_movi(s, TCG_TYPE_I32, args[0], 1);
+        tcg_out_label(s, label_over, (tcg_target_long)s->code_ptr);
+    }
+}
+
+static inline int have_cmov(void)
+{
+#ifdef __i686__
+    /* Compiler options say that cmov is available.  */
+    return 1;
+#else
+    /* ??? Use cpuid or something and figure out what's running.  */
+    return 0;
+#endif
+}
+
+static void tcg_out_movcond(TCGContext *s, const TCGArg *args,
+                            const int *const_args)
+{
+    int vtc, vfc, cond, use_cmov = 0, do_swap = 0;
+    TCGArg d, vt, vf;
+
+    d = args[0];
+    vt = args[3];
+    vf = args[4];
+    vtc = const_args[3];
+    vfc = const_args[4];
+
+    /* ??? The jcc code path below assumes that one mov insn must be skipped.
+       Rather than complicate the code below, make sure to simplify the
+       conditional move here.  */
+    if (vtc == vfc && vt == vf) {
+        if (vtc)
+            tcg_out_movi(s, TCG_TYPE_I32, d, vt);
+        else
+            tcg_out_mov(s, d, vt);
+        return;
+    }
+
+    cond = args[5];
+
+    /* If both arguments are constants, we *could* do all the funny bits that
+       gcc does with sbc, masks, etc.  There's likely no point.  Just use the
+       jcc version in this case.  We also have to be careful about clobbering
+       inputs when trying to move constants into position.  */
+
+    if (have_cmov()) {
+        use_cmov = 1;
+        if (vtc) {
+            if (vfc || d == vf)
+                use_cmov = 0;
+            else
+                do_swap = 1;
+        } else if (d == vt) {
+            if (vfc)
+                use_cmov = 0;
+            else
+                do_swap = 1;
+        }
+    }
+
+    if (!use_cmov) {
+        /* We're going to follow the lead of cmov and set D=VF first,
+           which means inverting the condition upon which we jump.  */
+        cond = tcg_invert_cond(cond);
+
+        /* Don't allow the move we jump over to be a nop.  */
+        do_swap = (!vtc && d == vt);
+    }
+
+    if (do_swap) {
+        TCGArg t;
+        cond = tcg_invert_cond(cond);
+        t = vf, vf = vt, vt = t;
+        t = vfc, vfc = vtc, vtc = t;
+    }
+
+    /* If possible, set D=0 before the compare, so that we can use XOR.  */
+    if (vfc && vf == 0 && d != args[1] && (const_args[2] || d != args[2])) {
+        tcg_out_movi(s, TCG_TYPE_I32, d, vf);
+        vf = d, vfc = 0;
+    }
+
+    tcg_out_cond(s, cond, args[1], args[2], const_args[2]);
+            
+    if (vfc) {
+        /* Force the use of "mov $0, d" to avoid clobbering flags.  */
+        tcg_out8(s, 0xb8 + d);
+        tcg_out32(s, vf);
+    } else {
+        tcg_out_mov(s, d, vf);
+    }
+
+    if (use_cmov) {
+        assert (!vtc);
+        tcg_out_modrm(s, 0x40 | tcg_cond_to_jcc[cond] | P_EXT, d, vt);
+    } else {
+        int label_next = gen_new_label();
+
+        tcg_out_jxx(s, tcg_cond_to_jcc[cond], label_next, 1);
+        if (vtc)
+            tcg_out_movi(s, TCG_TYPE_I32, d, vt);
+        else
+            tcg_out_mov(s, d, vt);
+
+        tcg_out_label(s, label_next, (tcg_target_long)s->code_ptr);
+    }
+}
+
 #if defined(CONFIG_SOFTMMU)
 
 #include "../../softmmu_defs.h"
@@ -913,7 +1080,7 @@  static inline void tcg_out_op(TCGContext *s, int opc,
         }
         break;
     case INDEX_op_br:
-        tcg_out_jxx(s, JCC_JMP, args[0]);
+        tcg_out_jxx(s, JCC_JMP, args[0], 0);
         break;
     case INDEX_op_movi_i32:
         tcg_out_movi(s, TCG_TYPE_I32, args[0], args[1]);
@@ -1044,10 +1211,11 @@  static inline void tcg_out_op(TCGContext *s, int opc,
             tcg_out_modrm(s, 0x01 | (ARITH_SBB << 3), args[5], args[1]);
         break;
     case INDEX_op_brcond_i32:
-        tcg_out_brcond(s, args[2], args[0], args[1], const_args[1], args[3]);
+        tcg_out_brcond(s, args[2], args[0], args[1], const_args[1],
+                       args[3], 0);
         break;
     case INDEX_op_brcond2_i32:
-        tcg_out_brcond2(s, args, const_args);
+        tcg_out_brcond2(s, args, const_args, 0);
         break;
 
     case INDEX_op_bswap16_i32:
@@ -1080,6 +1248,16 @@  static inline void tcg_out_op(TCGContext *s, int opc,
         tcg_out_modrm(s, 0xb7 | P_EXT, args[0], args[1]);
         break;
 
+    case INDEX_op_setcond_i32:
+        tcg_out_setcond(s, args[3], args[0], args[1], args[2], const_args[2]);
+        break;
+    case INDEX_op_movcond_i32:
+        tcg_out_movcond(s, args, const_args);
+        break;
+    case INDEX_op_setcond2_i32:
+        tcg_out_setcond2(s, args, const_args);
+        break;
+
     case INDEX_op_qemu_ld8u:
         tcg_out_qemu_ld(s, args, 0);
         break;
@@ -1168,6 +1346,10 @@  static const TCGTargetOpDef x86_op_defs[] = {
     { INDEX_op_ext8u_i32, { "r", "q"} },
     { INDEX_op_ext16u_i32, { "r", "r"} },
 
+    { INDEX_op_setcond_i32, { "q", "r", "ri" } },
+    { INDEX_op_movcond_i32, { "r", "r", "ri", "ri", "ri" } },
+    { INDEX_op_setcond2_i32, { "r", "r", "r", "ri", "ri" } },
+
 #if TARGET_LONG_BITS == 32
     { INDEX_op_qemu_ld8u, { "r", "L" } },
     { INDEX_op_qemu_ld8s, { "r", "L" } },