diff mbox

[2/6] TCG: clean up i386 and x86_64

Message ID CAAu8pHtfF0pObb7bOeS4YEByidhdimLn0BecZHFfVz5JkfnJUg@mail.gmail.com
State New
Headers show

Commit Message

Blue Swirl Feb. 13, 2012, 8:14 p.m. UTC
Remove 64 bit stuff on 32 and vice versa.

On i386, remove now useless OP_32_64, LOWREGMASK, P_REX* macros and
rexw parameters.

On x86_64, use TCG_REG_Rxx instead of TCG_REG_Exx.

Signed-off-by: Blue Swirl <blauwirbel@gmail.com>
---
 tcg/i386/tcg-target.c   |  675 ++++++++++-------------------------------------
 tcg/i386/tcg-target.h   |   52 +----
 tcg/x86_64/tcg-target.c |  627 ++++++++------------------------------------
 tcg/x86_64/tcg-target.h |   43 +---
 4 files changed, 258 insertions(+), 1139 deletions(-)
diff mbox

Patch

From 3f6c5fab1aade0ca293de4b6e11432010bf500f0 Mon Sep 17 00:00:00 2001
Message-Id: <3f6c5fab1aade0ca293de4b6e11432010bf500f0.1329162956.git.blauwirbel@gmail.com>
In-Reply-To: <1e6fa6e3ceb74e8c5daf6edb9b8298f871997d6e.1329162956.git.blauwirbel@gmail.com>
References: <1e6fa6e3ceb74e8c5daf6edb9b8298f871997d6e.1329162956.git.blauwirbel@gmail.com>
From: Blue Swirl <blauwirbel@gmail.com>
Date: Sat, 4 Feb 2012 14:43:27 +0000
Subject: [PATCH 2/6] TCG: clean up i386 and x86_64

Remove 64 bit stuff on 32 and vice versa.

On i386, remove now useless OP_32_64, LOWREGMASK, P_REX* macros and
rexw parameters.

On x86_64, use TCG_REG_Rxx instead of TCG_REG_Exx.

Signed-off-by: Blue Swirl <blauwirbel@gmail.com>
---
 tcg/i386/tcg-target.c   |  675 ++++++++++-------------------------------------
 tcg/i386/tcg-target.h   |   52 +----
 tcg/x86_64/tcg-target.c |  627 ++++++++------------------------------------
 tcg/x86_64/tcg-target.h |   43 +---
 4 files changed, 258 insertions(+), 1139 deletions(-)

diff --git a/tcg/i386/tcg-target.c b/tcg/i386/tcg-target.c
index dc81572..eac9a4c 100644
--- a/tcg/i386/tcg-target.c
+++ b/tcg/i386/tcg-target.c
@@ -24,33 +24,11 @@ 
 
 #ifndef NDEBUG
 static const char * const tcg_target_reg_names[TCG_TARGET_NB_REGS] = {
-#if TCG_TARGET_REG_BITS == 64
-    "%rax", "%rcx", "%rdx", "%rbx", "%rsp", "%rbp", "%rsi", "%rdi",
-    "%r8",  "%r9",  "%r10", "%r11", "%r12", "%r13", "%r14", "%r15",
-#else
     "%eax", "%ecx", "%edx", "%ebx", "%esp", "%ebp", "%esi", "%edi",
-#endif
 };
 #endif
 
 static const int tcg_target_reg_alloc_order[] = {
-#if TCG_TARGET_REG_BITS == 64
-    TCG_REG_RBP,
-    TCG_REG_RBX,
-    TCG_REG_R12,
-    TCG_REG_R13,
-    TCG_REG_R14,
-    TCG_REG_R15,
-    TCG_REG_R10,
-    TCG_REG_R11,
-    TCG_REG_R9,
-    TCG_REG_R8,
-    TCG_REG_RCX,
-    TCG_REG_RDX,
-    TCG_REG_RSI,
-    TCG_REG_RDI,
-    TCG_REG_RAX,
-#else
     TCG_REG_EBX,
     TCG_REG_ESI,
     TCG_REG_EDI,
@@ -58,29 +36,17 @@  static const int tcg_target_reg_alloc_order[] = {
     TCG_REG_ECX,
     TCG_REG_EDX,
     TCG_REG_EAX,
-#endif
 };
 
 static const int tcg_target_call_iarg_regs[] = {
-#if TCG_TARGET_REG_BITS == 64
-    TCG_REG_RDI,
-    TCG_REG_RSI,
-    TCG_REG_RDX,
-    TCG_REG_RCX,
-    TCG_REG_R8,
-    TCG_REG_R9,
-#else
     TCG_REG_EAX,
     TCG_REG_EDX,
     TCG_REG_ECX
-#endif
 };
 
 static const int tcg_target_call_oarg_regs[] = {
     TCG_REG_EAX,
-#if TCG_TARGET_REG_BITS == 32
     TCG_REG_EDX
-#endif
 };
 
 static uint8_t *tb_ret_addr;
@@ -112,10 +78,6 @@  static void patch_reloc(uint8_t *code_ptr, int type,
 /* maximum number of register used for input function arguments */
 static inline int tcg_target_get_call_iarg_regs_count(int flags)
 {
-    if (TCG_TARGET_REG_BITS == 64) {
-        return 6;
-    }
-
     flags &= TCG_CALL_TYPE_MASK;
     switch(flags) {
     case TCG_CALL_TYPE_STD:
@@ -162,11 +124,7 @@  static int target_parse_constraint(TCGArgConstraint *ct, const char **pct_str)
         break;
     case 'q':
         ct->ct |= TCG_CT_REG;
-        if (TCG_TARGET_REG_BITS == 64) {
-            tcg_regset_set32(ct->u.regs, 0, 0xffff);
-        } else {
-            tcg_regset_set32(ct->u.regs, 0, 0xf);
-        }
+        tcg_regset_set32(ct->u.regs, 0, 0xf);
         break;
     case 'Q':
         ct->ct |= TCG_CT_REG;
@@ -174,25 +132,15 @@  static int target_parse_constraint(TCGArgConstraint *ct, const char **pct_str)
         break;
     case 'r':
         ct->ct |= TCG_CT_REG;
-        if (TCG_TARGET_REG_BITS == 64) {
-            tcg_regset_set32(ct->u.regs, 0, 0xffff);
-        } else {
-            tcg_regset_set32(ct->u.regs, 0, 0xff);
-        }
+        tcg_regset_set32(ct->u.regs, 0, 0xff);
         break;
 
         /* qemu_ld/st address constraint */
     case 'L':
         ct->ct |= TCG_CT_REG;
-        if (TCG_TARGET_REG_BITS == 64) {
-            tcg_regset_set32(ct->u.regs, 0, 0xffff);
-            tcg_regset_reset_reg(ct->u.regs, TCG_REG_RSI);
-            tcg_regset_reset_reg(ct->u.regs, TCG_REG_RDI);
-        } else {
-            tcg_regset_set32(ct->u.regs, 0, 0xff);
-            tcg_regset_reset_reg(ct->u.regs, TCG_REG_EAX);
-            tcg_regset_reset_reg(ct->u.regs, TCG_REG_EDX);
-        }
+        tcg_regset_set32(ct->u.regs, 0, 0xff);
+        tcg_regset_reset_reg(ct->u.regs, TCG_REG_EAX);
+        tcg_regset_reset_reg(ct->u.regs, TCG_REG_EDX);
         break;
 
     case 'e':
@@ -227,25 +175,8 @@  static inline int tcg_target_const_match(tcg_target_long val,
     return 0;
 }
 
-#if TCG_TARGET_REG_BITS == 64
-# define LOWREGMASK(x)	((x) & 7)
-#else
-# define LOWREGMASK(x)	(x)
-#endif
-
 #define P_EXT		0x100		/* 0x0f opcode prefix */
 #define P_DATA16	0x200		/* 0x66 opcode prefix */
-#if TCG_TARGET_REG_BITS == 64
-# define P_ADDR32	0x400		/* 0x67 opcode prefix */
-# define P_REXW		0x800		/* Set REX.W = 1 */
-# define P_REXB_R	0x1000		/* REG field as byte register */
-# define P_REXB_RM	0x2000		/* R/M field as byte register */
-#else
-# define P_ADDR32	0
-# define P_REXW		0
-# define P_REXB_R	0
-# define P_REXB_RM	0
-#endif
 
 #define OPC_ARITH_EvIz	(0x81)
 #define OPC_ARITH_EvIb	(0x83)
@@ -271,7 +202,7 @@  static inline int tcg_target_const_match(tcg_target_long val,
 #define OPC_MOVL_Iv     (0xb8)
 #define OPC_MOVSBL	(0xbe | P_EXT)
 #define OPC_MOVSWL	(0xbf | P_EXT)
-#define OPC_MOVSLQ	(0x63 | P_REXW)
+#define OPC_MOVSLQ	(0x63)
 #define OPC_MOVZBL	(0xb6 | P_EXT)
 #define OPC_MOVZWL	(0xb7 | P_EXT)
 #define OPC_POP_r32	(0x58)
@@ -279,7 +210,7 @@  static inline int tcg_target_const_match(tcg_target_long val,
 #define OPC_PUSH_Iv	(0x68)
 #define OPC_PUSH_Ib	(0x6a)
 #define OPC_RET		(0xc3)
-#define OPC_SETCC	(0x90 | P_EXT | P_REXB_RM) /* ... plus cc */
+#define OPC_SETCC	(0x90 | P_EXT) /* ... plus cc */
 #define OPC_SHIFT_1	(0xd1)
 #define OPC_SHIFT_Ib	(0xc1)
 #define OPC_SHIFT_cl	(0xd3)
@@ -353,44 +284,6 @@  static const uint8_t tcg_cond_to_jcc[10] = {
     [TCG_COND_GTU] = JCC_JA,
 };
 
-#if TCG_TARGET_REG_BITS == 64
-static void tcg_out_opc(TCGContext *s, int opc, int r, int rm, int x)
-{
-    int rex;
-
-    if (opc & P_DATA16) {
-        /* We should never be asking for both 16 and 64-bit operation.  */
-        assert((opc & P_REXW) == 0);
-        tcg_out8(s, 0x66);
-    }
-    if (opc & P_ADDR32) {
-        tcg_out8(s, 0x67);
-    }
-
-    rex = 0;
-    rex |= (opc & P_REXW) >> 8;		/* REX.W */
-    rex |= (r & 8) >> 1;		/* REX.R */
-    rex |= (x & 8) >> 2;		/* REX.X */
-    rex |= (rm & 8) >> 3;		/* REX.B */
-
-    /* P_REXB_{R,RM} indicates that the given register is the low byte.
-       For %[abcd]l we need no REX prefix, but for %{si,di,bp,sp}l we do,
-       as otherwise the encoding indicates %[abcd]h.  Note that the values
-       that are ORed in merely indicate that the REX byte must be present;
-       those bits get discarded in output.  */
-    rex |= opc & (r >= 4 ? P_REXB_R : 0);
-    rex |= opc & (rm >= 4 ? P_REXB_RM : 0);
-
-    if (rex) {
-        tcg_out8(s, (uint8_t)(rex | 0x40));
-    }
-
-    if (opc & P_EXT) {
-        tcg_out8(s, 0x0f);
-    }
-    tcg_out8(s, opc);
-}
-#else
 static void tcg_out_opc(TCGContext *s, int opc)
 {
     if (opc & P_DATA16) {
@@ -405,12 +298,11 @@  static void tcg_out_opc(TCGContext *s, int opc)
    the 32-bit compilation paths.  This method works with all versions of gcc,
    whereas relying on optimization may not be able to exclude them.  */
 #define tcg_out_opc(s, opc, r, rm, x)  (tcg_out_opc)(s, opc)
-#endif
 
 static void tcg_out_modrm(TCGContext *s, int opc, int r, int rm)
 {
     tcg_out_opc(s, opc, r, rm, 0);
-    tcg_out8(s, 0xc0 | (LOWREGMASK(r) << 3) | LOWREGMASK(rm));
+    tcg_out8(s, 0xc0 | (r << 3) | rm);
 }
 
 /* Output an opcode with a full "rm + (index<<shift) + offset" address mode.
@@ -425,45 +317,18 @@  static void tcg_out_modrm_sib_offset(TCGContext *s, int opc, int r, int rm,
     int mod, len;
 
     if (index < 0 && rm < 0) {
-        if (TCG_TARGET_REG_BITS == 64) {
-            /* Try for a rip-relative addressing mode.  This has replaced
-               the 32-bit-mode absolute addressing encoding.  */
-            tcg_target_long pc = (tcg_target_long)s->code_ptr + 5 + ~rm;
-            tcg_target_long disp = offset - pc;
-            if (disp == (int32_t)disp) {
-                tcg_out_opc(s, opc, r, 0, 0);
-                tcg_out8(s, (LOWREGMASK(r) << 3) | 5);
-                tcg_out32(s, disp);
-                return;
-            }
-
-            /* Try for an absolute address encoding.  This requires the
-               use of the MODRM+SIB encoding and is therefore larger than
-               rip-relative addressing.  */
-            if (offset == (int32_t)offset) {
-                tcg_out_opc(s, opc, r, 0, 0);
-                tcg_out8(s, (LOWREGMASK(r) << 3) | 4);
-                tcg_out8(s, (4 << 3) | 5);
-                tcg_out32(s, offset);
-                return;
-            }
-
-            /* ??? The memory isn't directly addressable.  */
-            tcg_abort();
-        } else {
-            /* Absolute address.  */
-            tcg_out_opc(s, opc, r, 0, 0);
-            tcg_out8(s, (r << 3) | 5);
-            tcg_out32(s, offset);
-            return;
-        }
+        /* Absolute address.  */
+        tcg_out_opc(s, opc, r, 0, 0);
+        tcg_out8(s, (r << 3) | 5);
+        tcg_out32(s, offset);
+        return;
     }
 
     /* Find the length of the immediate addend.  Note that the encoding
        that would be used for (%ebp) indicates absolute addressing.  */
     if (rm < 0) {
         mod = 0, len = 4, rm = 5;
-    } else if (offset == 0 && LOWREGMASK(rm) != TCG_REG_EBP) {
+    } else if (offset == 0 && rm != TCG_REG_EBP) {
         mod = 0, len = 0;
     } else if (offset == (int8_t)offset) {
         mod = 0x40, len = 1;
@@ -473,10 +338,10 @@  static void tcg_out_modrm_sib_offset(TCGContext *s, int opc, int r, int rm,
 
     /* Use a single byte MODRM format if possible.  Note that the encoding
        that would be used for %esp is the escape to the two byte form.  */
-    if (index < 0 && LOWREGMASK(rm) != TCG_REG_ESP) {
+    if (index < 0 && rm != TCG_REG_ESP) {
         /* Single byte MODRM format.  */
         tcg_out_opc(s, opc, r, rm, 0);
-        tcg_out8(s, mod | (LOWREGMASK(r) << 3) | LOWREGMASK(rm));
+        tcg_out8(s, mod | (r << 3) | rm);
     } else {
         /* Two byte MODRM+SIB format.  */
 
@@ -490,8 +355,8 @@  static void tcg_out_modrm_sib_offset(TCGContext *s, int opc, int r, int rm,
         }
 
         tcg_out_opc(s, opc, r, rm, index);
-        tcg_out8(s, mod | (LOWREGMASK(r) << 3) | 4);
-        tcg_out8(s, (shift << 6) | (LOWREGMASK(index) << 3) | LOWREGMASK(rm));
+        tcg_out8(s, mod | (r << 3) | 4);
+        tcg_out8(s, (shift << 6) | (index << 3) | rm);
     }
 
     if (len == 1) {
@@ -522,7 +387,7 @@  static inline void tcg_out_mov(TCGContext *s, TCGType type,
                                TCGReg ret, TCGReg arg)
 {
     if (arg != ret) {
-        int opc = OPC_MOVL_GvEv + (type == TCG_TYPE_I64 ? P_REXW : 0);
+        int opc = OPC_MOVL_GvEv;
         tcg_out_modrm(s, opc, ret, arg);
     }
 }
@@ -534,13 +399,13 @@  static void tcg_out_movi(TCGContext *s, TCGType type,
         tgen_arithr(s, ARITH_XOR, ret, ret);
         return;
     } else if (arg == (uint32_t)arg || type == TCG_TYPE_I32) {
-        tcg_out_opc(s, OPC_MOVL_Iv + LOWREGMASK(ret), 0, ret, 0);
+        tcg_out_opc(s, OPC_MOVL_Iv + ret, 0, ret, 0);
         tcg_out32(s, arg);
     } else if (arg == (int32_t)arg) {
-        tcg_out_modrm(s, OPC_MOVL_EvIz + P_REXW, 0, ret);
+        tcg_out_modrm(s, OPC_MOVL_EvIz, 0, ret);
         tcg_out32(s, arg);
     } else {
-        tcg_out_opc(s, OPC_MOVL_Iv + P_REXW + LOWREGMASK(ret), 0, ret, 0);
+        tcg_out_opc(s, OPC_MOVL_Iv + ret, 0, ret, 0);
         tcg_out32(s, arg);
         tcg_out32(s, arg >> 31 >> 1);
     }
@@ -561,25 +426,25 @@  static inline void tcg_out_pushi(TCGContext *s, tcg_target_long val)
 
 static inline void tcg_out_push(TCGContext *s, int reg)
 {
-    tcg_out_opc(s, OPC_PUSH_r32 + LOWREGMASK(reg), 0, reg, 0);
+    tcg_out_opc(s, OPC_PUSH_r32 + reg, 0, reg, 0);
 }
 
 static inline void tcg_out_pop(TCGContext *s, int reg)
 {
-    tcg_out_opc(s, OPC_POP_r32 + LOWREGMASK(reg), 0, reg, 0);
+    tcg_out_opc(s, OPC_POP_r32 + reg, 0, reg, 0);
 }
 
 static inline void tcg_out_ld(TCGContext *s, TCGType type, TCGReg ret,
                               TCGReg arg1, tcg_target_long arg2)
 {
-    int opc = OPC_MOVL_GvEv + (type == TCG_TYPE_I64 ? P_REXW : 0);
+    int opc = OPC_MOVL_GvEv;
     tcg_out_modrm_offset(s, opc, ret, arg1, arg2);
 }
 
 static inline void tcg_out_st(TCGContext *s, TCGType type, TCGReg arg,
                               TCGReg arg1, tcg_target_long arg2)
 {
-    int opc = OPC_MOVL_EvGv + (type == TCG_TYPE_I64 ? P_REXW : 0);
+    int opc = OPC_MOVL_EvGv;
     tcg_out_modrm_offset(s, opc, arg, arg1, arg2);
 }
 
@@ -599,7 +464,7 @@  static void tcg_out_shifti(TCGContext *s, int subopc, int reg, int count)
 
 static inline void tcg_out_bswap32(TCGContext *s, int reg)
 {
-    tcg_out_opc(s, OPC_BSWAP + LOWREGMASK(reg), 0, reg, 0);
+    tcg_out_opc(s, OPC_BSWAP + reg, 0, reg, 0);
 }
 
 static inline void tcg_out_rolw_8(TCGContext *s, int reg)
@@ -610,15 +475,15 @@  static inline void tcg_out_rolw_8(TCGContext *s, int reg)
 static inline void tcg_out_ext8u(TCGContext *s, int dest, int src)
 {
     /* movzbl */
-    assert(src < 4 || TCG_TARGET_REG_BITS == 64);
-    tcg_out_modrm(s, OPC_MOVZBL + P_REXB_RM, dest, src);
+    assert(src < 4);
+    tcg_out_modrm(s, OPC_MOVZBL, dest, src);
 }
 
-static void tcg_out_ext8s(TCGContext *s, int dest, int src, int rexw)
+static void tcg_out_ext8s(TCGContext *s, int dest, int src)
 {
     /* movsbl */
-    assert(src < 4 || TCG_TARGET_REG_BITS == 64);
-    tcg_out_modrm(s, OPC_MOVSBL + P_REXB_RM + rexw, dest, src);
+    assert(src < 4);
+    tcg_out_modrm(s, OPC_MOVSBL, dest, src);
 }
 
 static inline void tcg_out_ext16u(TCGContext *s, int dest, int src)
@@ -627,10 +492,10 @@  static inline void tcg_out_ext16u(TCGContext *s, int dest, int src)
     tcg_out_modrm(s, OPC_MOVZWL, dest, src);
 }
 
-static inline void tcg_out_ext16s(TCGContext *s, int dest, int src, int rexw)
+static inline void tcg_out_ext16s(TCGContext *s, int dest, int src)
 {
     /* movsw[lq] */
-    tcg_out_modrm(s, OPC_MOVSWL + rexw, dest, src);
+    tcg_out_modrm(s, OPC_MOVSWL, dest, src);
 }
 
 static inline void tcg_out_ext32u(TCGContext *s, int dest, int src)
@@ -646,47 +511,24 @@  static inline void tcg_out_ext32s(TCGContext *s, int dest, int src)
 
 static inline void tcg_out_bswap64(TCGContext *s, int reg)
 {
-    tcg_out_opc(s, OPC_BSWAP + P_REXW + LOWREGMASK(reg), 0, reg, 0);
+    tcg_out_opc(s, OPC_BSWAP + reg, 0, reg, 0);
 }
 
 static void tgen_arithi(TCGContext *s, int c, int r0,
                         tcg_target_long val, int cf)
 {
-    int rexw = 0;
-
-    if (TCG_TARGET_REG_BITS == 64) {
-        rexw = c & -8;
-        c &= 7;
-    }
-
     /* ??? While INC is 2 bytes shorter than ADDL $1, they also induce
        partial flags update stalls on Pentium4 and are not recommended
        by current Intel optimization manuals.  */
     if (!cf && (c == ARITH_ADD || c == ARITH_SUB) && (val == 1 || val == -1)) {
         int is_inc = (c == ARITH_ADD) ^ (val < 0);
-        if (TCG_TARGET_REG_BITS == 64) {
-            /* The single-byte increment encodings are re-tasked as the
-               REX prefixes.  Use the MODRM encoding.  */
-            tcg_out_modrm(s, OPC_GRP5 + rexw,
-                          (is_inc ? EXT5_INC_Ev : EXT5_DEC_Ev), r0);
-        } else {
-            tcg_out8(s, (is_inc ? OPC_INC_r32 : OPC_DEC_r32) + r0);
-        }
+
+        tcg_out8(s, (is_inc ? OPC_INC_r32 : OPC_DEC_r32) + r0);
         return;
     }
 
     if (c == ARITH_AND) {
-        if (TCG_TARGET_REG_BITS == 64) {
-            if (val == 0xffffffffu) {
-                tcg_out_ext32u(s, r0, r0);
-                return;
-            }
-            if (val == (uint32_t)val) {
-                /* AND with no high bits set can use a 32-bit operation.  */
-                rexw = 0;
-            }
-        }
-        if (val == 0xffu && (r0 < 4 || TCG_TARGET_REG_BITS == 64)) {
+        if (val == 0xffu && r0 < 4) {
             tcg_out_ext8u(s, r0, r0);
             return;
         }
@@ -697,23 +539,19 @@  static void tgen_arithi(TCGContext *s, int c, int r0,
     }
 
     if (val == (int8_t)val) {
-        tcg_out_modrm(s, OPC_ARITH_EvIb + rexw, c, r0);
+        tcg_out_modrm(s, OPC_ARITH_EvIb, c, r0);
         tcg_out8(s, val);
         return;
     }
-    if (rexw == 0 || val == (int32_t)val) {
-        tcg_out_modrm(s, OPC_ARITH_EvIz + rexw, c, r0);
-        tcg_out32(s, val);
-        return;
-    }
-
-    tcg_abort();
+    tcg_out_modrm(s, OPC_ARITH_EvIz, c, r0);
+    tcg_out32(s, val);
+    return;
 }
 
 static void tcg_out_addi(TCGContext *s, int reg, tcg_target_long val)
 {
     if (val != 0) {
-        tgen_arithi(s, ARITH_ADD + P_REXW, reg, val, 0);
+        tgen_arithi(s, ARITH_ADD, reg, val, 0);
     }
 }
 
@@ -765,17 +603,17 @@  static void tcg_out_jxx(TCGContext *s, int opc, int label_index, int small)
 }
 
 static void tcg_out_cmp(TCGContext *s, TCGArg arg1, TCGArg arg2,
-                        int const_arg2, int rexw)
+                        int const_arg2)
 {
     if (const_arg2) {
         if (arg2 == 0) {
             /* test r, r */
-            tcg_out_modrm(s, OPC_TESTL + rexw, arg1, arg1);
+            tcg_out_modrm(s, OPC_TESTL, arg1, arg1);
         } else {
-            tgen_arithi(s, ARITH_CMP + rexw, arg1, arg2, 0);
+            tgen_arithi(s, ARITH_CMP, arg1, arg2, 0);
         }
     } else {
-        tgen_arithr(s, ARITH_CMP + rexw, arg1, arg2);
+        tgen_arithr(s, ARITH_CMP, arg1, arg2);
     }
 }
 
@@ -783,19 +621,10 @@  static void tcg_out_brcond32(TCGContext *s, TCGCond cond,
                              TCGArg arg1, TCGArg arg2, int const_arg2,
                              int label_index, int small)
 {
-    tcg_out_cmp(s, arg1, arg2, const_arg2, 0);
+    tcg_out_cmp(s, arg1, arg2, const_arg2);
     tcg_out_jxx(s, tcg_cond_to_jcc[cond], label_index, small);
 }
 
-#if TCG_TARGET_REG_BITS == 64
-static void tcg_out_brcond64(TCGContext *s, TCGCond cond,
-                             TCGArg arg1, TCGArg arg2, int const_arg2,
-                             int label_index, int small)
-{
-    tcg_out_cmp(s, arg1, arg2, const_arg2, P_REXW);
-    tcg_out_jxx(s, tcg_cond_to_jcc[cond], label_index, small);
-}
-#else
 /* XXX: we implement it at the target level to avoid having to
    handle cross basic blocks temporaries */
 static void tcg_out_brcond2(TCGContext *s, const TCGArg *args,
@@ -877,25 +706,15 @@  static void tcg_out_brcond2(TCGContext *s, const TCGArg *args,
     }
     tcg_out_label(s, label_next, (tcg_target_long)s->code_ptr);
 }
-#endif
 
 static void tcg_out_setcond32(TCGContext *s, TCGCond cond, TCGArg dest,
                               TCGArg arg1, TCGArg arg2, int const_arg2)
 {
-    tcg_out_cmp(s, arg1, arg2, const_arg2, 0);
+    tcg_out_cmp(s, arg1, arg2, const_arg2);
     tcg_out_modrm(s, OPC_SETCC | tcg_cond_to_jcc[cond], 0, dest);
     tcg_out_ext8u(s, dest, dest);
 }
 
-#if TCG_TARGET_REG_BITS == 64
-static void tcg_out_setcond64(TCGContext *s, TCGCond cond, TCGArg dest,
-                              TCGArg arg1, TCGArg arg2, int const_arg2)
-{
-    tcg_out_cmp(s, arg1, arg2, const_arg2, P_REXW);
-    tcg_out_modrm(s, OPC_SETCC | tcg_cond_to_jcc[cond], 0, dest);
-    tcg_out_ext8u(s, dest, dest);
-}
-#else
 static void tcg_out_setcond2(TCGContext *s, const TCGArg *args,
                              const int *const_args)
 {
@@ -937,20 +756,13 @@  static void tcg_out_setcond2(TCGContext *s, const TCGArg *args,
         tcg_out_label(s, label_over, (tcg_target_long)s->code_ptr);
     }
 }
-#endif
 
 static void tcg_out_branch(TCGContext *s, int call, tcg_target_long dest)
 {
     tcg_target_long disp = dest - (tcg_target_long)s->code_ptr - 5;
 
-    if (disp == (int32_t)disp) {
-        tcg_out_opc(s, call ? OPC_CALL_Jz : OPC_JMP_long, 0, 0, 0);
-        tcg_out32(s, disp);
-    } else {
-        tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_R10, dest);
-        tcg_out_modrm(s, OPC_GRP5,
-                      call ? EXT5_CALLN_Ev : EXT5_JMPN_Ev, TCG_REG_R10);
-    }
+    tcg_out_opc(s, call ? OPC_CALL_Jz : OPC_JMP_long, 0, 0, 0);
+    tcg_out32(s, disp);
 }
 
 static inline void tcg_out_calli(TCGContext *s, tcg_target_long dest)
@@ -1012,30 +824,24 @@  static inline void tcg_out_tlb_load(TCGContext *s, int addrlo_idx,
     const int r0 = tcg_target_call_iarg_regs[0];
     const int r1 = tcg_target_call_iarg_regs[1];
     TCGType type = TCG_TYPE_I32;
-    int rexw = 0;
-
-    if (TCG_TARGET_REG_BITS == 64 && TARGET_LONG_BITS == 64) {
-        type = TCG_TYPE_I64;
-        rexw = P_REXW;
-    }
 
     tcg_out_mov(s, type, r1, addrlo);
     tcg_out_mov(s, type, r0, addrlo);
 
-    tcg_out_shifti(s, SHIFT_SHR + rexw, r1,
+    tcg_out_shifti(s, SHIFT_SHR, r1,
                    TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS);
 
-    tgen_arithi(s, ARITH_AND + rexw, r0,
+    tgen_arithi(s, ARITH_AND, r0,
                 TARGET_PAGE_MASK | ((1 << s_bits) - 1), 0);
-    tgen_arithi(s, ARITH_AND + rexw, r1,
+    tgen_arithi(s, ARITH_AND, r1,
                 (CPU_TLB_SIZE - 1) << CPU_TLB_ENTRY_BITS, 0);
 
-    tcg_out_modrm_sib_offset(s, OPC_LEA + P_REXW, r1, TCG_AREG0, r1, 0,
+    tcg_out_modrm_sib_offset(s, OPC_LEA, r1, TCG_AREG0, r1, 0,
                              offsetof(CPUState, tlb_table[mem_index][0])
                              + which);
 
     /* cmp 0(r1), r0 */
-    tcg_out_modrm_offset(s, OPC_CMP_GvEv + rexw, r0, r1, 0);
+    tcg_out_modrm_offset(s, OPC_CMP_GvEv, r0, r1, 0);
 
     tcg_out_mov(s, type, r0, addrlo);
 
@@ -1044,7 +850,7 @@  static inline void tcg_out_tlb_load(TCGContext *s, int addrlo_idx,
     label_ptr[0] = s->code_ptr;
     s->code_ptr++;
 
-    if (TARGET_LONG_BITS > TCG_TARGET_REG_BITS) {
+    if (TARGET_LONG_BITS == 64) {
         /* cmp 4(r1), addrhi */
         tcg_out_modrm_offset(s, OPC_CMP_GvEv, args[addrlo_idx+1], r1, 4);
 
@@ -1057,7 +863,7 @@  static inline void tcg_out_tlb_load(TCGContext *s, int addrlo_idx,
     /* TLB Hit.  */
 
     /* add addend(r1), r0 */
-    tcg_out_modrm_offset(s, OPC_ADD_GvEv + P_REXW, r0, r1,
+    tcg_out_modrm_offset(s, OPC_ADD_GvEv, r0, r1,
                          offsetof(CPUTLBEntry, addend) - which);
 }
 #endif
@@ -1075,7 +881,7 @@  static void tcg_out_qemu_ld_direct(TCGContext *s, int datalo, int datahi,
         tcg_out_modrm_offset(s, OPC_MOVZBL, datalo, base, ofs);
         break;
     case 0 | 4:
-        tcg_out_modrm_offset(s, OPC_MOVSBL + P_REXW, datalo, base, ofs);
+        tcg_out_modrm_offset(s, OPC_MOVSBL, datalo, base, ofs);
         break;
     case 1:
         tcg_out_modrm_offset(s, OPC_MOVZWL, datalo, base, ofs);
@@ -1087,9 +893,9 @@  static void tcg_out_qemu_ld_direct(TCGContext *s, int datalo, int datahi,
         if (bswap) {
             tcg_out_modrm_offset(s, OPC_MOVZWL, datalo, base, ofs);
             tcg_out_rolw_8(s, datalo);
-            tcg_out_modrm(s, OPC_MOVSWL + P_REXW, datalo, datalo);
+            tcg_out_modrm(s, OPC_MOVSWL, datalo, datalo);
         } else {
-            tcg_out_modrm_offset(s, OPC_MOVSWL + P_REXW, datalo, base, ofs);
+            tcg_out_modrm_offset(s, OPC_MOVSWL, datalo, base, ofs);
         }
         break;
     case 2:
@@ -1098,40 +904,23 @@  static void tcg_out_qemu_ld_direct(TCGContext *s, int datalo, int datahi,
             tcg_out_bswap32(s, datalo);
         }
         break;
-#if TCG_TARGET_REG_BITS == 64
-    case 2 | 4:
+    case 3:
         if (bswap) {
+            int t = datalo;
+
+            datalo = datahi;
+            datahi = t;
+        }
+        if (base != datalo) {
             tcg_out_ld(s, TCG_TYPE_I32, datalo, base, ofs);
-            tcg_out_bswap32(s, datalo);
-            tcg_out_ext32s(s, datalo, datalo);
+            tcg_out_ld(s, TCG_TYPE_I32, datahi, base, ofs + 4);
         } else {
-            tcg_out_modrm_offset(s, OPC_MOVSLQ, datalo, base, ofs);
+            tcg_out_ld(s, TCG_TYPE_I32, datahi, base, ofs + 4);
+            tcg_out_ld(s, TCG_TYPE_I32, datalo, base, ofs);
         }
-        break;
-#endif
-    case 3:
-        if (TCG_TARGET_REG_BITS == 64) {
-            tcg_out_ld(s, TCG_TYPE_I64, datalo, base, ofs);
-            if (bswap) {
-                tcg_out_bswap64(s, datalo);
-            }
-        } else {
-            if (bswap) {
-                int t = datalo;
-                datalo = datahi;
-                datahi = t;
-            }
-            if (base != datalo) {
-                tcg_out_ld(s, TCG_TYPE_I32, datalo, base, ofs);
-                tcg_out_ld(s, TCG_TYPE_I32, datahi, base, ofs + 4);
-            } else {
-                tcg_out_ld(s, TCG_TYPE_I32, datahi, base, ofs + 4);
-                tcg_out_ld(s, TCG_TYPE_I32, datalo, base, ofs);
-            }
-            if (bswap) {
-                tcg_out_bswap32(s, datalo);
-                tcg_out_bswap32(s, datahi);
-            }
+        if (bswap) {
+            tcg_out_bswap32(s, datalo);
+            tcg_out_bswap32(s, datahi);
         }
         break;
     default:
@@ -1154,13 +943,13 @@  static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args,
 
     data_reg = args[0];
     addrlo_idx = 1;
-    if (TCG_TARGET_REG_BITS == 32 && opc == 3) {
+    if (opc == 3) {
         data_reg2 = args[1];
         addrlo_idx = 2;
     }
 
 #if defined(CONFIG_SOFTMMU)
-    mem_index = args[addrlo_idx + 1 + (TARGET_LONG_BITS > TCG_TARGET_REG_BITS)];
+    mem_index = args[addrlo_idx + 1 + (TARGET_LONG_BITS == 64)];
     s_bits = opc & 3;
 
     tcg_out_tlb_load(s, addrlo_idx, mem_index, s_bits, args,
@@ -1179,14 +968,14 @@  static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args,
 
     /* label1: */
     *label_ptr[0] = s->code_ptr - label_ptr[0] - 1;
-    if (TARGET_LONG_BITS > TCG_TARGET_REG_BITS) {
+    if (TARGET_LONG_BITS == 64) {
         *label_ptr[1] = s->code_ptr - label_ptr[1] - 1;
     }
 
     /* XXX: move that code at the end of the TB */
     /* The first argument is already loaded with addrlo.  */
     arg_idx = 1;
-    if (TCG_TARGET_REG_BITS == 32 && TARGET_LONG_BITS == 64) {
+    if (TARGET_LONG_BITS == 64) {
         tcg_out_mov(s, TCG_TYPE_I32, tcg_target_call_iarg_regs[arg_idx++],
                     args[addrlo_idx + 1]);
     }
@@ -1196,10 +985,10 @@  static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args,
 
     switch(opc) {
     case 0 | 4:
-        tcg_out_ext8s(s, data_reg, TCG_REG_EAX, P_REXW);
+        tcg_out_ext8s(s, data_reg, TCG_REG_EAX);
         break;
     case 1 | 4:
-        tcg_out_ext16s(s, data_reg, TCG_REG_EAX, P_REXW);
+        tcg_out_ext16s(s, data_reg, TCG_REG_EAX);
         break;
     case 0:
         tcg_out_ext8u(s, data_reg, TCG_REG_EAX);
@@ -1210,15 +999,8 @@  static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args,
     case 2:
         tcg_out_mov(s, TCG_TYPE_I32, data_reg, TCG_REG_EAX);
         break;
-#if TCG_TARGET_REG_BITS == 64
-    case 2 | 4:
-        tcg_out_ext32s(s, data_reg, TCG_REG_EAX);
-        break;
-#endif
     case 3:
-        if (TCG_TARGET_REG_BITS == 64) {
-            tcg_out_mov(s, TCG_TYPE_I64, data_reg, TCG_REG_RAX);
-        } else if (data_reg == TCG_REG_EDX) {
+        if (data_reg == TCG_REG_EDX) {
             /* xchg %edx, %eax */
             tcg_out_opc(s, OPC_XCHG_ax_r32 + TCG_REG_EDX, 0, 0, 0);
             tcg_out_mov(s, TCG_TYPE_I32, data_reg2, TCG_REG_EAX);
@@ -1238,20 +1020,6 @@  static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args,
         int32_t offset = GUEST_BASE;
         int base = args[addrlo_idx];
 
-        if (TCG_TARGET_REG_BITS == 64) {
-            /* ??? We assume all operations have left us with register
-               contents that are zero extended.  So far this appears to
-               be true.  If we want to enforce this, we can either do
-               an explicit zero-extension here, or (if GUEST_BASE == 0)
-               use the ADDR32 prefix.  For now, do nothing.  */
-
-            if (offset != GUEST_BASE) {
-                tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_RDI, GUEST_BASE);
-                tgen_arithr(s, ARITH_ADD + P_REXW, TCG_REG_RDI, base);
-                base = TCG_REG_RDI, offset = 0;
-            }
-        }
-
         tcg_out_qemu_ld_direct(s, data_reg, data_reg2, base, offset, opc);
     }
 #endif
@@ -1273,7 +1041,7 @@  static void tcg_out_qemu_st_direct(TCGContext *s, int datalo, int datahi,
 
     switch (sizeop) {
     case 0:
-        tcg_out_modrm_offset(s, OPC_MOVB_EvGv + P_REXB_R, datalo, base, ofs);
+        tcg_out_modrm_offset(s, OPC_MOVB_EvGv, datalo, base, ofs);
         break;
     case 1:
         if (bswap) {
@@ -1292,14 +1060,7 @@  static void tcg_out_qemu_st_direct(TCGContext *s, int datalo, int datahi,
         tcg_out_st(s, TCG_TYPE_I32, datalo, base, ofs);
         break;
     case 3:
-        if (TCG_TARGET_REG_BITS == 64) {
-            if (bswap) {
-                tcg_out_mov(s, TCG_TYPE_I64, scratch, datalo);
-                tcg_out_bswap64(s, scratch);
-                datalo = scratch;
-            }
-            tcg_out_st(s, TCG_TYPE_I64, datalo, base, ofs);
-        } else if (bswap) {
+        if (bswap) {
             tcg_out_mov(s, TCG_TYPE_I32, scratch, datahi);
             tcg_out_bswap32(s, scratch);
             tcg_out_st(s, TCG_TYPE_I32, scratch, base, ofs);
@@ -1329,13 +1090,13 @@  static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args,
 
     data_reg = args[0];
     addrlo_idx = 1;
-    if (TCG_TARGET_REG_BITS == 32 && opc == 3) {
+    if (opc == 3) {
         data_reg2 = args[1];
         addrlo_idx = 2;
     }
 
 #if defined(CONFIG_SOFTMMU)
-    mem_index = args[addrlo_idx + 1 + (TARGET_LONG_BITS > TCG_TARGET_REG_BITS)];
+    mem_index = args[addrlo_idx + 1 + (TARGET_LONG_BITS == 64)];
     s_bits = opc;
 
     tcg_out_tlb_load(s, addrlo_idx, mem_index, s_bits, args,
@@ -1354,17 +1115,12 @@  static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args,
 
     /* label1: */
     *label_ptr[0] = s->code_ptr - label_ptr[0] - 1;
-    if (TARGET_LONG_BITS > TCG_TARGET_REG_BITS) {
+    if (TARGET_LONG_BITS == 64) {
         *label_ptr[1] = s->code_ptr - label_ptr[1] - 1;
     }
 
     /* XXX: move that code at the end of the TB */
-    if (TCG_TARGET_REG_BITS == 64) {
-        tcg_out_mov(s, (opc == 3 ? TCG_TYPE_I64 : TCG_TYPE_I32),
-                    TCG_REG_RSI, data_reg);
-        tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_RDX, mem_index);
-        stack_adjust = 0;
-    } else if (TARGET_LONG_BITS == 32) {
+    if (TARGET_LONG_BITS == 32) {
         tcg_out_mov(s, TCG_TYPE_I32, TCG_REG_EDX, data_reg);
         if (opc == 3) {
             tcg_out_mov(s, TCG_TYPE_I32, TCG_REG_ECX, data_reg2);
@@ -1415,20 +1171,6 @@  static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args,
         int32_t offset = GUEST_BASE;
         int base = args[addrlo_idx];
 
-        if (TCG_TARGET_REG_BITS == 64) {
-            /* ??? We assume all operations have left us with register
-               contents that are zero extended.  So far this appears to
-               be true.  If we want to enforce this, we can either do
-               an explicit zero-extension here, or (if GUEST_BASE == 0)
-               use the ADDR32 prefix.  For now, do nothing.  */
-
-            if (offset != GUEST_BASE) {
-                tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_RDI, GUEST_BASE);
-                tgen_arithr(s, ARITH_ADD + P_REXW, TCG_REG_RDI, base);
-                base = TCG_REG_RDI, offset = 0;
-            }
-        }
-
         tcg_out_qemu_st_direct(s, data_reg, data_reg2, base, offset, opc);
     }
 #endif
@@ -1437,17 +1179,7 @@  static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args,
 static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
                               const TCGArg *args, const int *const_args)
 {
-    int c, rexw = 0;
-
-#if TCG_TARGET_REG_BITS == 64
-# define OP_32_64(x) \
-        case glue(glue(INDEX_op_, x), _i64): \
-            rexw = P_REXW; /* FALLTHRU */    \
-        case glue(glue(INDEX_op_, x), _i32)
-#else
-# define OP_32_64(x) \
-        case glue(glue(INDEX_op_, x), _i32)
-#endif
+    int c;
 
     switch(opc) {
     case INDEX_op_exit_tb:
@@ -1489,43 +1221,35 @@  static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
     case INDEX_op_movi_i32:
         tcg_out_movi(s, TCG_TYPE_I32, args[0], args[1]);
         break;
-    OP_32_64(ld8u):
-        /* Note that we can ignore REXW for the zero-extend to 64-bit.  */
+    case INDEX_op_ld8u_i32:
         tcg_out_modrm_offset(s, OPC_MOVZBL, args[0], args[1], args[2]);
         break;
-    OP_32_64(ld8s):
-        tcg_out_modrm_offset(s, OPC_MOVSBL + rexw, args[0], args[1], args[2]);
+    case INDEX_op_ld8s_i32:
+        tcg_out_modrm_offset(s, OPC_MOVSBL, args[0], args[1], args[2]);
         break;
-    OP_32_64(ld16u):
-        /* Note that we can ignore REXW for the zero-extend to 64-bit.  */
+    case INDEX_op_ld16u_i32:
         tcg_out_modrm_offset(s, OPC_MOVZWL, args[0], args[1], args[2]);
         break;
-    OP_32_64(ld16s):
-        tcg_out_modrm_offset(s, OPC_MOVSWL + rexw, args[0], args[1], args[2]);
+    case INDEX_op_ld16s_i32:
+        tcg_out_modrm_offset(s, OPC_MOVSWL, args[0], args[1], args[2]);
         break;
-#if TCG_TARGET_REG_BITS == 64
-    case INDEX_op_ld32u_i64:
-#endif
     case INDEX_op_ld_i32:
         tcg_out_ld(s, TCG_TYPE_I32, args[0], args[1], args[2]);
         break;
 
-    OP_32_64(st8):
-        tcg_out_modrm_offset(s, OPC_MOVB_EvGv | P_REXB_R,
+    case INDEX_op_st8_i32:
+        tcg_out_modrm_offset(s, OPC_MOVB_EvGv,
                              args[0], args[1], args[2]);
         break;
-    OP_32_64(st16):
+    case INDEX_op_st16_i32:
         tcg_out_modrm_offset(s, OPC_MOVL_EvGv | P_DATA16,
                              args[0], args[1], args[2]);
         break;
-#if TCG_TARGET_REG_BITS == 64
-    case INDEX_op_st32_i64:
-#endif
     case INDEX_op_st_i32:
         tcg_out_st(s, TCG_TYPE_I32, args[0], args[1], args[2]);
         break;
 
-    OP_32_64(add):
+    case INDEX_op_add_i32:
         /* For 3-operand addition, use LEA.  */
         if (args[0] != args[1]) {
             TCGArg a0 = args[0], a1 = args[1], a2 = args[2], c3 = 0;
@@ -1535,78 +1259,78 @@  static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
             } else if (a0 == a2) {
                 /* Watch out for dest = src + dest, since we've removed
                    the matching constraint on the add.  */
-                tgen_arithr(s, ARITH_ADD + rexw, a0, a1);
+                tgen_arithr(s, ARITH_ADD, a0, a1);
                 break;
             }
 
-            tcg_out_modrm_sib_offset(s, OPC_LEA + rexw, a0, a1, a2, 0, c3);
+            tcg_out_modrm_sib_offset(s, OPC_LEA, a0, a1, a2, 0, c3);
             break;
         }
         c = ARITH_ADD;
         goto gen_arith;
-    OP_32_64(sub):
+    case INDEX_op_sub_i32:
         c = ARITH_SUB;
         goto gen_arith;
-    OP_32_64(and):
+    case INDEX_op_and_i32:
         c = ARITH_AND;
         goto gen_arith;
-    OP_32_64(or):
+    case INDEX_op_or_i32:
         c = ARITH_OR;
         goto gen_arith;
-    OP_32_64(xor):
+    case INDEX_op_xor_i32:
         c = ARITH_XOR;
         goto gen_arith;
     gen_arith:
         if (const_args[2]) {
-            tgen_arithi(s, c + rexw, args[0], args[2], 0);
+            tgen_arithi(s, c, args[0], args[2], 0);
         } else {
-            tgen_arithr(s, c + rexw, args[0], args[2]);
+            tgen_arithr(s, c, args[0], args[2]);
         }
         break;
 
-    OP_32_64(mul):
+    case INDEX_op_mul_i32:
         if (const_args[2]) {
             int32_t val;
             val = args[2];
             if (val == (int8_t)val) {
-                tcg_out_modrm(s, OPC_IMUL_GvEvIb + rexw, args[0], args[0]);
+                tcg_out_modrm(s, OPC_IMUL_GvEvIb, args[0], args[0]);
                 tcg_out8(s, val);
             } else {
-                tcg_out_modrm(s, OPC_IMUL_GvEvIz + rexw, args[0], args[0]);
+                tcg_out_modrm(s, OPC_IMUL_GvEvIz, args[0], args[0]);
                 tcg_out32(s, val);
             }
         } else {
-            tcg_out_modrm(s, OPC_IMUL_GvEv + rexw, args[0], args[2]);
+            tcg_out_modrm(s, OPC_IMUL_GvEv, args[0], args[2]);
         }
         break;
 
-    OP_32_64(div2):
-        tcg_out_modrm(s, OPC_GRP3_Ev + rexw, EXT3_IDIV, args[4]);
+    case INDEX_op_div2_i32:
+        tcg_out_modrm(s, OPC_GRP3_Ev, EXT3_IDIV, args[4]);
         break;
-    OP_32_64(divu2):
-        tcg_out_modrm(s, OPC_GRP3_Ev + rexw, EXT3_DIV, args[4]);
+    case INDEX_op_divu2_i32:
+        tcg_out_modrm(s, OPC_GRP3_Ev, EXT3_DIV, args[4]);
         break;
 
-    OP_32_64(shl):
+    case INDEX_op_shl_i32:
         c = SHIFT_SHL;
         goto gen_shift;
-    OP_32_64(shr):
+    case INDEX_op_shr_i32:
         c = SHIFT_SHR;
         goto gen_shift;
-    OP_32_64(sar):
+    case INDEX_op_sar_i32:
         c = SHIFT_SAR;
         goto gen_shift;
-    OP_32_64(rotl):
+    case INDEX_op_rotl_i32:
         c = SHIFT_ROL;
         goto gen_shift;
-    OP_32_64(rotr):
+    case INDEX_op_rotr_i32:
         c = SHIFT_ROR;
         goto gen_shift;
     gen_shift:
         if (const_args[2]) {
-            tcg_out_shifti(s, c + rexw, args[0], args[2]);
+            tcg_out_shifti(s, c, args[0], args[2]);
         } else {
-            tcg_out_modrm(s, OPC_SHIFT_cl + rexw, c, args[0]);
+            tcg_out_modrm(s, OPC_SHIFT_cl, c, args[0]);
         }
         break;
 
@@ -1619,30 +1343,30 @@  static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
                           args[2], const_args[2]);
         break;
 
-    OP_32_64(bswap16):
+    case INDEX_op_bswap16_i32:
         tcg_out_rolw_8(s, args[0]);
         break;
-    OP_32_64(bswap32):
+    case INDEX_op_bswap32_i32:
         tcg_out_bswap32(s, args[0]);
         break;
 
-    OP_32_64(neg):
-        tcg_out_modrm(s, OPC_GRP3_Ev + rexw, EXT3_NEG, args[0]);
+    case INDEX_op_neg_i32:
+        tcg_out_modrm(s, OPC_GRP3_Ev, EXT3_NEG, args[0]);
         break;
-    OP_32_64(not):
-        tcg_out_modrm(s, OPC_GRP3_Ev + rexw, EXT3_NOT, args[0]);
+    case INDEX_op_not_i32:
+        tcg_out_modrm(s, OPC_GRP3_Ev, EXT3_NOT, args[0]);
         break;
 
-    OP_32_64(ext8s):
-        tcg_out_ext8s(s, args[0], args[1], rexw);
+    case INDEX_op_ext8s_i32:
+        tcg_out_ext8s(s, args[0], args[1]);
         break;
-    OP_32_64(ext16s):
-        tcg_out_ext16s(s, args[0], args[1], rexw);
+    case INDEX_op_ext16s_i32:
+        tcg_out_ext16s(s, args[0], args[1]);
         break;
-    OP_32_64(ext8u):
+    case INDEX_op_ext8u_i32:
         tcg_out_ext8u(s, args[0], args[1]);
         break;
-    OP_32_64(ext16u):
+    case INDEX_op_ext16u_i32:
         tcg_out_ext16u(s, args[0], args[1]);
         break;
 
@@ -1658,9 +1382,6 @@  static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
     case INDEX_op_qemu_ld16s:
         tcg_out_qemu_ld(s, args, 1 | 4);
         break;
-#if TCG_TARGET_REG_BITS == 64
-    case INDEX_op_qemu_ld32u:
-#endif
     case INDEX_op_qemu_ld32:
         tcg_out_qemu_ld(s, args, 2);
         break;
@@ -1681,7 +1402,6 @@  static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
         tcg_out_qemu_st(s, args, 3);
         break;
 
-#if TCG_TARGET_REG_BITS == 32
     case INDEX_op_brcond2_i32:
         tcg_out_brcond2(s, args, const_args, 0);
         break;
@@ -1715,47 +1435,11 @@  static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
             tgen_arithr(s, ARITH_SBB, args[1], args[5]);
         }
         break;
-#else /* TCG_TARGET_REG_BITS == 64 */
-    case INDEX_op_movi_i64:
-        tcg_out_movi(s, TCG_TYPE_I64, args[0], args[1]);
-        break;
-    case INDEX_op_ld32s_i64:
-        tcg_out_modrm_offset(s, OPC_MOVSLQ, args[0], args[1], args[2]);
-        break;
-    case INDEX_op_ld_i64:
-        tcg_out_ld(s, TCG_TYPE_I64, args[0], args[1], args[2]);
-        break;
-    case INDEX_op_st_i64:
-        tcg_out_st(s, TCG_TYPE_I64, args[0], args[1], args[2]);
-        break;
-    case INDEX_op_qemu_ld32s:
-        tcg_out_qemu_ld(s, args, 2 | 4);
-        break;
 
-    case INDEX_op_brcond_i64:
-        tcg_out_brcond64(s, args[2], args[0], args[1], const_args[1],
-                         args[3], 0);
-        break;
-    case INDEX_op_setcond_i64:
-        tcg_out_setcond64(s, args[3], args[0], args[1],
-                          args[2], const_args[2]);
-        break;
-
-    case INDEX_op_bswap64_i64:
-        tcg_out_bswap64(s, args[0]);
-        break;
-    case INDEX_op_ext32u_i64:
-        tcg_out_ext32u(s, args[0], args[1]);
-        break;
-    case INDEX_op_ext32s_i64:
-        tcg_out_ext32s(s, args[0], args[1]);
-        break;
-#endif
-
-    OP_32_64(deposit):
+    case INDEX_op_deposit_i32:
         if (args[3] == 0 && args[4] == 8) {
             /* load bits 0..7 */
-            tcg_out_modrm(s, OPC_MOVB_EvGv | P_REXB_R | P_REXB_RM,
+            tcg_out_modrm(s, OPC_MOVB_EvGv,
                           args[2], args[0]);
         } else if (args[3] == 8 && args[4] == 8) {
             /* load bits 8..15 */
@@ -1825,76 +1509,13 @@  static const TCGTargetOpDef x86_op_defs[] = {
 
     { INDEX_op_deposit_i32, { "Q", "0", "Q" } },
 
-#if TCG_TARGET_REG_BITS == 32
     { INDEX_op_mulu2_i32, { "a", "d", "a", "r" } },
     { INDEX_op_add2_i32, { "r", "r", "0", "1", "ri", "ri" } },
     { INDEX_op_sub2_i32, { "r", "r", "0", "1", "ri", "ri" } },
     { INDEX_op_brcond2_i32, { "r", "r", "ri", "ri" } },
     { INDEX_op_setcond2_i32, { "r", "r", "r", "ri", "ri" } },
-#else
-    { INDEX_op_mov_i64, { "r", "r" } },
-    { INDEX_op_movi_i64, { "r" } },
-    { INDEX_op_ld8u_i64, { "r", "r" } },
-    { INDEX_op_ld8s_i64, { "r", "r" } },
-    { INDEX_op_ld16u_i64, { "r", "r" } },
-    { INDEX_op_ld16s_i64, { "r", "r" } },
-    { INDEX_op_ld32u_i64, { "r", "r" } },
-    { INDEX_op_ld32s_i64, { "r", "r" } },
-    { INDEX_op_ld_i64, { "r", "r" } },
-    { INDEX_op_st8_i64, { "r", "r" } },
-    { INDEX_op_st16_i64, { "r", "r" } },
-    { INDEX_op_st32_i64, { "r", "r" } },
-    { INDEX_op_st_i64, { "r", "r" } },
-
-    { INDEX_op_add_i64, { "r", "0", "re" } },
-    { INDEX_op_mul_i64, { "r", "0", "re" } },
-    { INDEX_op_div2_i64, { "a", "d", "0", "1", "r" } },
-    { INDEX_op_divu2_i64, { "a", "d", "0", "1", "r" } },
-    { INDEX_op_sub_i64, { "r", "0", "re" } },
-    { INDEX_op_and_i64, { "r", "0", "reZ" } },
-    { INDEX_op_or_i64, { "r", "0", "re" } },
-    { INDEX_op_xor_i64, { "r", "0", "re" } },
-
-    { INDEX_op_shl_i64, { "r", "0", "ci" } },
-    { INDEX_op_shr_i64, { "r", "0", "ci" } },
-    { INDEX_op_sar_i64, { "r", "0", "ci" } },
-    { INDEX_op_rotl_i64, { "r", "0", "ci" } },
-    { INDEX_op_rotr_i64, { "r", "0", "ci" } },
-
-    { INDEX_op_brcond_i64, { "r", "re" } },
-    { INDEX_op_setcond_i64, { "r", "r", "re" } },
-
-    { INDEX_op_bswap16_i64, { "r", "0" } },
-    { INDEX_op_bswap32_i64, { "r", "0" } },
-    { INDEX_op_bswap64_i64, { "r", "0" } },
-    { INDEX_op_neg_i64, { "r", "0" } },
-    { INDEX_op_not_i64, { "r", "0" } },
-
-    { INDEX_op_ext8s_i64, { "r", "r" } },
-    { INDEX_op_ext16s_i64, { "r", "r" } },
-    { INDEX_op_ext32s_i64, { "r", "r" } },
-    { INDEX_op_ext8u_i64, { "r", "r" } },
-    { INDEX_op_ext16u_i64, { "r", "r" } },
-    { INDEX_op_ext32u_i64, { "r", "r" } },
-
-    { INDEX_op_deposit_i64, { "Q", "0", "Q" } },
-#endif
 
-#if TCG_TARGET_REG_BITS == 64
-    { INDEX_op_qemu_ld8u, { "r", "L" } },
-    { INDEX_op_qemu_ld8s, { "r", "L" } },
-    { INDEX_op_qemu_ld16u, { "r", "L" } },
-    { INDEX_op_qemu_ld16s, { "r", "L" } },
-    { INDEX_op_qemu_ld32, { "r", "L" } },
-    { INDEX_op_qemu_ld32u, { "r", "L" } },
-    { INDEX_op_qemu_ld32s, { "r", "L" } },
-    { INDEX_op_qemu_ld64, { "r", "L" } },
-
-    { INDEX_op_qemu_st8, { "L", "L" } },
-    { INDEX_op_qemu_st16, { "L", "L" } },
-    { INDEX_op_qemu_st32, { "L", "L" } },
-    { INDEX_op_qemu_st64, { "L", "L" } },
-#elif TARGET_LONG_BITS <= TCG_TARGET_REG_BITS
+#if TARGET_LONG_BITS <= TCG_TARGET_REG_BITS
     { INDEX_op_qemu_ld8u, { "r", "L" } },
     { INDEX_op_qemu_ld8s, { "r", "L" } },
     { INDEX_op_qemu_ld16u, { "r", "L" } },
@@ -1923,19 +1544,10 @@  static const TCGTargetOpDef x86_op_defs[] = {
 };
 
 static int tcg_target_callee_save_regs[] = {
-#if TCG_TARGET_REG_BITS == 64
-    TCG_REG_RBP,
-    TCG_REG_RBX,
-    TCG_REG_R12,
-    TCG_REG_R13,
-    TCG_REG_R14, /* Currently used for the global env. */
-    TCG_REG_R15,
-#else
     TCG_REG_EBP, /* Currently used for the global env. */
     TCG_REG_EBX,
     TCG_REG_ESI,
     TCG_REG_EDI,
-#endif
 };
 
 /* Generate global QEMU prologue and epilogue code */
@@ -1988,25 +1600,12 @@  static void tcg_target_init(TCGContext *s)
         tcg_abort();
 #endif
 
-    if (TCG_TARGET_REG_BITS == 64) {
-        tcg_regset_set32(tcg_target_available_regs[TCG_TYPE_I32], 0, 0xffff);
-        tcg_regset_set32(tcg_target_available_regs[TCG_TYPE_I64], 0, 0xffff);
-    } else {
-        tcg_regset_set32(tcg_target_available_regs[TCG_TYPE_I32], 0, 0xff);
-    }
+    tcg_regset_set32(tcg_target_available_regs[TCG_TYPE_I32], 0, 0xff);
 
     tcg_regset_clear(tcg_target_call_clobber_regs);
     tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_EAX);
     tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_EDX);
     tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_ECX);
-    if (TCG_TARGET_REG_BITS == 64) {
-        tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_RDI);
-        tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_RSI);
-        tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R8);
-        tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R9);
-        tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R10);
-        tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R11);
-    }
 
     tcg_regset_clear(s->reserved_regs);
     tcg_regset_set_reg(s->reserved_regs, TCG_REG_CALL_STACK);
diff --git a/tcg/i386/tcg-target.h b/tcg/i386/tcg-target.h
index adbb036..0fc2a20 100644
--- a/tcg/i386/tcg-target.h
+++ b/tcg/i386/tcg-target.h
@@ -25,11 +25,7 @@ 
 
 //#define TCG_TARGET_WORDS_BIGENDIAN
 
-#if TCG_TARGET_REG_BITS == 64
-# define TCG_TARGET_NB_REGS 16
-#else
-# define TCG_TARGET_NB_REGS 8
-#endif
+#define TCG_TARGET_NB_REGS 8
 
 typedef enum {
     TCG_REG_EAX = 0,
@@ -40,25 +36,6 @@  typedef enum {
     TCG_REG_EBP,
     TCG_REG_ESI,
     TCG_REG_EDI,
-
-    /* 64-bit registers; always define the symbols to avoid
-       too much if-deffing.  */
-    TCG_REG_R8,
-    TCG_REG_R9,
-    TCG_REG_R10,
-    TCG_REG_R11,
-    TCG_REG_R12,
-    TCG_REG_R13,
-    TCG_REG_R14,
-    TCG_REG_R15,
-    TCG_REG_RAX = TCG_REG_EAX,
-    TCG_REG_RCX = TCG_REG_ECX,
-    TCG_REG_RDX = TCG_REG_EDX,
-    TCG_REG_RBX = TCG_REG_EBX,
-    TCG_REG_RSP = TCG_REG_ESP,
-    TCG_REG_RBP = TCG_REG_EBP,
-    TCG_REG_RSI = TCG_REG_ESI,
-    TCG_REG_RDI = TCG_REG_EDI,
 } TCGReg;
 
 #define TCG_CT_CONST_S32 0x100
@@ -87,27 +64,6 @@  typedef enum {
 #define TCG_TARGET_HAS_nor_i32          0
 #define TCG_TARGET_HAS_deposit_i32      1
 
-#if TCG_TARGET_REG_BITS == 64
-#define TCG_TARGET_HAS_div2_i64         1
-#define TCG_TARGET_HAS_rot_i64          1
-#define TCG_TARGET_HAS_ext8s_i64        1
-#define TCG_TARGET_HAS_ext16s_i64       1
-#define TCG_TARGET_HAS_ext32s_i64       1
-#define TCG_TARGET_HAS_ext8u_i64        1
-#define TCG_TARGET_HAS_ext16u_i64       1
-#define TCG_TARGET_HAS_ext32u_i64       1
-#define TCG_TARGET_HAS_bswap16_i64      1
-#define TCG_TARGET_HAS_bswap32_i64      1
-#define TCG_TARGET_HAS_bswap64_i64      1
-#define TCG_TARGET_HAS_neg_i64          1
-#define TCG_TARGET_HAS_not_i64          1
-#define TCG_TARGET_HAS_andc_i64         0
-#define TCG_TARGET_HAS_orc_i64          0
-#define TCG_TARGET_HAS_eqv_i64          0
-#define TCG_TARGET_HAS_nand_i64         0
-#define TCG_TARGET_HAS_nor_i64          0
-#define TCG_TARGET_HAS_deposit_i64      1
-#endif
 
 #define TCG_TARGET_deposit_i32_valid(ofs, len) \
     (((ofs) == 0 && (len) == 8) || ((ofs) == 8 && (len) == 8) || \
@@ -117,11 +73,7 @@  typedef enum {
 #define TCG_TARGET_HAS_GUEST_BASE
 
 /* Note: must be synced with dyngen-exec.h */
-#if TCG_TARGET_REG_BITS == 64
-# define TCG_AREG0 TCG_REG_R14
-#else
-# define TCG_AREG0 TCG_REG_EBP
-#endif
+#define TCG_AREG0 TCG_REG_EBP
 
 static inline void flush_icache_range(unsigned long start, unsigned long stop)
 {
diff --git a/tcg/x86_64/tcg-target.c b/tcg/x86_64/tcg-target.c
index dc81572..840623a 100644
--- a/tcg/x86_64/tcg-target.c
+++ b/tcg/x86_64/tcg-target.c
@@ -24,17 +24,12 @@ 
 
 #ifndef NDEBUG
 static const char * const tcg_target_reg_names[TCG_TARGET_NB_REGS] = {
-#if TCG_TARGET_REG_BITS == 64
     "%rax", "%rcx", "%rdx", "%rbx", "%rsp", "%rbp", "%rsi", "%rdi",
     "%r8",  "%r9",  "%r10", "%r11", "%r12", "%r13", "%r14", "%r15",
-#else
-    "%eax", "%ecx", "%edx", "%ebx", "%esp", "%ebp", "%esi", "%edi",
-#endif
 };
 #endif
 
 static const int tcg_target_reg_alloc_order[] = {
-#if TCG_TARGET_REG_BITS == 64
     TCG_REG_RBP,
     TCG_REG_RBX,
     TCG_REG_R12,
@@ -50,37 +45,19 @@  static const int tcg_target_reg_alloc_order[] = {
     TCG_REG_RSI,
     TCG_REG_RDI,
     TCG_REG_RAX,
-#else
-    TCG_REG_EBX,
-    TCG_REG_ESI,
-    TCG_REG_EDI,
-    TCG_REG_EBP,
-    TCG_REG_ECX,
-    TCG_REG_EDX,
-    TCG_REG_EAX,
-#endif
 };
 
 static const int tcg_target_call_iarg_regs[] = {
-#if TCG_TARGET_REG_BITS == 64
     TCG_REG_RDI,
     TCG_REG_RSI,
     TCG_REG_RDX,
     TCG_REG_RCX,
     TCG_REG_R8,
     TCG_REG_R9,
-#else
-    TCG_REG_EAX,
-    TCG_REG_EDX,
-    TCG_REG_ECX
-#endif
 };
 
 static const int tcg_target_call_oarg_regs[] = {
-    TCG_REG_EAX,
-#if TCG_TARGET_REG_BITS == 32
-    TCG_REG_EDX
-#endif
+    TCG_REG_RAX,
 };
 
 static uint8_t *tb_ret_addr;
@@ -112,21 +89,7 @@  static void patch_reloc(uint8_t *code_ptr, int type,
 /* maximum number of register used for input function arguments */
 static inline int tcg_target_get_call_iarg_regs_count(int flags)
 {
-    if (TCG_TARGET_REG_BITS == 64) {
-        return 6;
-    }
-
-    flags &= TCG_CALL_TYPE_MASK;
-    switch(flags) {
-    case TCG_CALL_TYPE_STD:
-        return 0;
-    case TCG_CALL_TYPE_REGPARM_1:
-    case TCG_CALL_TYPE_REGPARM_2:
-    case TCG_CALL_TYPE_REGPARM:
-        return flags - TCG_CALL_TYPE_REGPARM_1 + 1;
-    default:
-        tcg_abort();
-    }
+    return 6;
 }
 
 /* parse target specific constraints */
@@ -138,27 +101,27 @@  static int target_parse_constraint(TCGArgConstraint *ct, const char **pct_str)
     switch(ct_str[0]) {
     case 'a':
         ct->ct |= TCG_CT_REG;
-        tcg_regset_set_reg(ct->u.regs, TCG_REG_EAX);
+        tcg_regset_set_reg(ct->u.regs, TCG_REG_RAX);
         break;
     case 'b':
         ct->ct |= TCG_CT_REG;
-        tcg_regset_set_reg(ct->u.regs, TCG_REG_EBX);
+        tcg_regset_set_reg(ct->u.regs, TCG_REG_RBX);
         break;
     case 'c':
         ct->ct |= TCG_CT_REG;
-        tcg_regset_set_reg(ct->u.regs, TCG_REG_ECX);
+        tcg_regset_set_reg(ct->u.regs, TCG_REG_RCX);
         break;
     case 'd':
         ct->ct |= TCG_CT_REG;
-        tcg_regset_set_reg(ct->u.regs, TCG_REG_EDX);
+        tcg_regset_set_reg(ct->u.regs, TCG_REG_RDX);
         break;
     case 'S':
         ct->ct |= TCG_CT_REG;
-        tcg_regset_set_reg(ct->u.regs, TCG_REG_ESI);
+        tcg_regset_set_reg(ct->u.regs, TCG_REG_RSI);
         break;
     case 'D':
         ct->ct |= TCG_CT_REG;
-        tcg_regset_set_reg(ct->u.regs, TCG_REG_EDI);
+        tcg_regset_set_reg(ct->u.regs, TCG_REG_RDI);
         break;
     case 'q':
         ct->ct |= TCG_CT_REG;
@@ -184,15 +147,9 @@  static int target_parse_constraint(TCGArgConstraint *ct, const char **pct_str)
         /* qemu_ld/st address constraint */
     case 'L':
         ct->ct |= TCG_CT_REG;
-        if (TCG_TARGET_REG_BITS == 64) {
-            tcg_regset_set32(ct->u.regs, 0, 0xffff);
-            tcg_regset_reset_reg(ct->u.regs, TCG_REG_RSI);
-            tcg_regset_reset_reg(ct->u.regs, TCG_REG_RDI);
-        } else {
-            tcg_regset_set32(ct->u.regs, 0, 0xff);
-            tcg_regset_reset_reg(ct->u.regs, TCG_REG_EAX);
-            tcg_regset_reset_reg(ct->u.regs, TCG_REG_EDX);
-        }
+        tcg_regset_set32(ct->u.regs, 0, 0xffff);
+        tcg_regset_reset_reg(ct->u.regs, TCG_REG_RSI);
+        tcg_regset_reset_reg(ct->u.regs, TCG_REG_RDI);
         break;
 
     case 'e':
@@ -227,25 +184,14 @@  static inline int tcg_target_const_match(tcg_target_long val,
     return 0;
 }
 
-#if TCG_TARGET_REG_BITS == 64
-# define LOWREGMASK(x)	((x) & 7)
-#else
-# define LOWREGMASK(x)	(x)
-#endif
+#define LOWREGMASK(x)   ((x) & 7)
 
 #define P_EXT		0x100		/* 0x0f opcode prefix */
 #define P_DATA16	0x200		/* 0x66 opcode prefix */
-#if TCG_TARGET_REG_BITS == 64
-# define P_ADDR32	0x400		/* 0x67 opcode prefix */
-# define P_REXW		0x800		/* Set REX.W = 1 */
-# define P_REXB_R	0x1000		/* REG field as byte register */
-# define P_REXB_RM	0x2000		/* R/M field as byte register */
-#else
-# define P_ADDR32	0
-# define P_REXW		0
-# define P_REXB_R	0
-# define P_REXB_RM	0
-#endif
+#define P_ADDR32        0x400           /* 0x67 opcode prefix */
+#define P_REXW          0x800           /* Set REX.W = 1 */
+#define P_REXB_R        0x1000          /* REG field as byte register */
+#define P_REXB_RM       0x2000          /* R/M field as byte register */
 
 #define OPC_ARITH_EvIz	(0x81)
 #define OPC_ARITH_EvIb	(0x83)
@@ -353,7 +299,6 @@  static const uint8_t tcg_cond_to_jcc[10] = {
     [TCG_COND_GTU] = JCC_JA,
 };
 
-#if TCG_TARGET_REG_BITS == 64
 static void tcg_out_opc(TCGContext *s, int opc, int r, int rm, int x)
 {
     int rex;
@@ -390,22 +335,6 @@  static void tcg_out_opc(TCGContext *s, int opc, int r, int rm, int x)
     }
     tcg_out8(s, opc);
 }
-#else
-static void tcg_out_opc(TCGContext *s, int opc)
-{
-    if (opc & P_DATA16) {
-        tcg_out8(s, 0x66);
-    }
-    if (opc & P_EXT) {
-        tcg_out8(s, 0x0f);
-    }
-    tcg_out8(s, opc);
-}
-/* Discard the register arguments to tcg_out_opc early, so as not to penalize
-   the 32-bit compilation paths.  This method works with all versions of gcc,
-   whereas relying on optimization may not be able to exclude them.  */
-#define tcg_out_opc(s, opc, r, rm, x)  (tcg_out_opc)(s, opc)
-#endif
 
 static void tcg_out_modrm(TCGContext *s, int opc, int r, int rm)
 {
@@ -425,45 +354,38 @@  static void tcg_out_modrm_sib_offset(TCGContext *s, int opc, int r, int rm,
     int mod, len;
 
     if (index < 0 && rm < 0) {
-        if (TCG_TARGET_REG_BITS == 64) {
-            /* Try for a rip-relative addressing mode.  This has replaced
-               the 32-bit-mode absolute addressing encoding.  */
-            tcg_target_long pc = (tcg_target_long)s->code_ptr + 5 + ~rm;
-            tcg_target_long disp = offset - pc;
-            if (disp == (int32_t)disp) {
-                tcg_out_opc(s, opc, r, 0, 0);
-                tcg_out8(s, (LOWREGMASK(r) << 3) | 5);
-                tcg_out32(s, disp);
-                return;
-            }
+        /* Try for a rip-relative addressing mode.  This has replaced
+           the 32-bit-mode absolute addressing encoding.  */
+        tcg_target_long pc = (tcg_target_long)s->code_ptr + 5 + ~rm;
+        tcg_target_long disp = offset - pc;
 
-            /* Try for an absolute address encoding.  This requires the
-               use of the MODRM+SIB encoding and is therefore larger than
-               rip-relative addressing.  */
-            if (offset == (int32_t)offset) {
-                tcg_out_opc(s, opc, r, 0, 0);
-                tcg_out8(s, (LOWREGMASK(r) << 3) | 4);
-                tcg_out8(s, (4 << 3) | 5);
-                tcg_out32(s, offset);
-                return;
-            }
+        if (disp == (int32_t)disp) {
+            tcg_out_opc(s, opc, r, 0, 0);
+            tcg_out8(s, (LOWREGMASK(r) << 3) | 5);
+            tcg_out32(s, disp);
+            return;
+        }
 
-            /* ??? The memory isn't directly addressable.  */
-            tcg_abort();
-        } else {
-            /* Absolute address.  */
+        /* Try for an absolute address encoding.  This requires the
+           use of the MODRM+SIB encoding and is therefore larger than
+           rip-relative addressing.  */
+        if (offset == (int32_t)offset) {
             tcg_out_opc(s, opc, r, 0, 0);
-            tcg_out8(s, (r << 3) | 5);
+            tcg_out8(s, (LOWREGMASK(r) << 3) | 4);
+            tcg_out8(s, (4 << 3) | 5);
             tcg_out32(s, offset);
             return;
         }
+
+        /* ??? The memory isn't directly addressable.  */
+        tcg_abort();
     }
 
     /* Find the length of the immediate addend.  Note that the encoding
        that would be used for (%ebp) indicates absolute addressing.  */
     if (rm < 0) {
         mod = 0, len = 4, rm = 5;
-    } else if (offset == 0 && LOWREGMASK(rm) != TCG_REG_EBP) {
+    } else if (offset == 0 && LOWREGMASK(rm) != TCG_REG_RBP) {
         mod = 0, len = 0;
     } else if (offset == (int8_t)offset) {
         mod = 0x40, len = 1;
@@ -473,7 +395,7 @@  static void tcg_out_modrm_sib_offset(TCGContext *s, int opc, int r, int rm,
 
     /* Use a single byte MODRM format if possible.  Note that the encoding
        that would be used for %esp is the escape to the two byte form.  */
-    if (index < 0 && LOWREGMASK(rm) != TCG_REG_ESP) {
+    if (index < 0 && LOWREGMASK(rm) != TCG_REG_RSP) {
         /* Single byte MODRM format.  */
         tcg_out_opc(s, opc, r, rm, 0);
         tcg_out8(s, mod | (LOWREGMASK(r) << 3) | LOWREGMASK(rm));
@@ -486,7 +408,7 @@  static void tcg_out_modrm_sib_offset(TCGContext *s, int opc, int r, int rm,
         if (index < 0) {
             index = 4;
         } else {
-            assert(index != TCG_REG_ESP);
+            assert(index != TCG_REG_RSP);
         }
 
         tcg_out_opc(s, opc, r, rm, index);
@@ -610,14 +532,12 @@  static inline void tcg_out_rolw_8(TCGContext *s, int reg)
 static inline void tcg_out_ext8u(TCGContext *s, int dest, int src)
 {
     /* movzbl */
-    assert(src < 4 || TCG_TARGET_REG_BITS == 64);
     tcg_out_modrm(s, OPC_MOVZBL + P_REXB_RM, dest, src);
 }
 
 static void tcg_out_ext8s(TCGContext *s, int dest, int src, int rexw)
 {
     /* movsbl */
-    assert(src < 4 || TCG_TARGET_REG_BITS == 64);
     tcg_out_modrm(s, OPC_MOVSBL + P_REXB_RM + rexw, dest, src);
 }
 
@@ -654,39 +574,31 @@  static void tgen_arithi(TCGContext *s, int c, int r0,
 {
     int rexw = 0;
 
-    if (TCG_TARGET_REG_BITS == 64) {
-        rexw = c & -8;
-        c &= 7;
-    }
+    rexw = c & -8;
+    c &= 7;
 
     /* ??? While INC is 2 bytes shorter than ADDL $1, they also induce
        partial flags update stalls on Pentium4 and are not recommended
        by current Intel optimization manuals.  */
     if (!cf && (c == ARITH_ADD || c == ARITH_SUB) && (val == 1 || val == -1)) {
         int is_inc = (c == ARITH_ADD) ^ (val < 0);
-        if (TCG_TARGET_REG_BITS == 64) {
-            /* The single-byte increment encodings are re-tasked as the
-               REX prefixes.  Use the MODRM encoding.  */
-            tcg_out_modrm(s, OPC_GRP5 + rexw,
-                          (is_inc ? EXT5_INC_Ev : EXT5_DEC_Ev), r0);
-        } else {
-            tcg_out8(s, (is_inc ? OPC_INC_r32 : OPC_DEC_r32) + r0);
-        }
+        /* The single-byte increment encodings are re-tasked as the
+           REX prefixes.  Use the MODRM encoding.  */
+        tcg_out_modrm(s, OPC_GRP5 + rexw,
+                      (is_inc ? EXT5_INC_Ev : EXT5_DEC_Ev), r0);
         return;
     }
 
     if (c == ARITH_AND) {
-        if (TCG_TARGET_REG_BITS == 64) {
-            if (val == 0xffffffffu) {
-                tcg_out_ext32u(s, r0, r0);
-                return;
-            }
-            if (val == (uint32_t)val) {
-                /* AND with no high bits set can use a 32-bit operation.  */
-                rexw = 0;
-            }
+        if (val == 0xffffffffu) {
+            tcg_out_ext32u(s, r0, r0);
+            return;
+        }
+        if (val == (uint32_t)val) {
+            /* AND with no high bits set can use a 32-bit operation.  */
+            rexw = 0;
         }
-        if (val == 0xffu && (r0 < 4 || TCG_TARGET_REG_BITS == 64)) {
+        if (val == 0xffu) {
             tcg_out_ext8u(s, r0, r0);
             return;
         }
@@ -787,7 +699,6 @@  static void tcg_out_brcond32(TCGContext *s, TCGCond cond,
     tcg_out_jxx(s, tcg_cond_to_jcc[cond], label_index, small);
 }
 
-#if TCG_TARGET_REG_BITS == 64
 static void tcg_out_brcond64(TCGContext *s, TCGCond cond,
                              TCGArg arg1, TCGArg arg2, int const_arg2,
                              int label_index, int small)
@@ -795,89 +706,6 @@  static void tcg_out_brcond64(TCGContext *s, TCGCond cond,
     tcg_out_cmp(s, arg1, arg2, const_arg2, P_REXW);
     tcg_out_jxx(s, tcg_cond_to_jcc[cond], label_index, small);
 }
-#else
-/* XXX: we implement it at the target level to avoid having to
-   handle cross basic blocks temporaries */
-static void tcg_out_brcond2(TCGContext *s, const TCGArg *args,
-                            const int *const_args, int small)
-{
-    int label_next;
-    label_next = gen_new_label();
-    switch(args[4]) {
-    case TCG_COND_EQ:
-        tcg_out_brcond32(s, TCG_COND_NE, args[0], args[2], const_args[2],
-                         label_next, 1);
-        tcg_out_brcond32(s, TCG_COND_EQ, args[1], args[3], const_args[3],
-                         args[5], small);
-        break;
-    case TCG_COND_NE:
-        tcg_out_brcond32(s, TCG_COND_NE, args[0], args[2], const_args[2],
-                         args[5], small);
-        tcg_out_brcond32(s, TCG_COND_NE, args[1], args[3], const_args[3],
-                         args[5], small);
-        break;
-    case TCG_COND_LT:
-        tcg_out_brcond32(s, TCG_COND_LT, args[1], args[3], const_args[3],
-                         args[5], small);
-        tcg_out_jxx(s, JCC_JNE, label_next, 1);
-        tcg_out_brcond32(s, TCG_COND_LTU, args[0], args[2], const_args[2],
-                         args[5], small);
-        break;
-    case TCG_COND_LE:
-        tcg_out_brcond32(s, TCG_COND_LT, args[1], args[3], const_args[3],
-                         args[5], small);
-        tcg_out_jxx(s, JCC_JNE, label_next, 1);
-        tcg_out_brcond32(s, TCG_COND_LEU, args[0], args[2], const_args[2],
-                         args[5], small);
-        break;
-    case TCG_COND_GT:
-        tcg_out_brcond32(s, TCG_COND_GT, args[1], args[3], const_args[3],
-                         args[5], small);
-        tcg_out_jxx(s, JCC_JNE, label_next, 1);
-        tcg_out_brcond32(s, TCG_COND_GTU, args[0], args[2], const_args[2],
-                         args[5], small);
-        break;
-    case TCG_COND_GE:
-        tcg_out_brcond32(s, TCG_COND_GT, args[1], args[3], const_args[3],
-                         args[5], small);
-        tcg_out_jxx(s, JCC_JNE, label_next, 1);
-        tcg_out_brcond32(s, TCG_COND_GEU, args[0], args[2], const_args[2],
-                         args[5], small);
-        break;
-    case TCG_COND_LTU:
-        tcg_out_brcond32(s, TCG_COND_LTU, args[1], args[3], const_args[3],
-                         args[5], small);
-        tcg_out_jxx(s, JCC_JNE, label_next, 1);
-        tcg_out_brcond32(s, TCG_COND_LTU, args[0], args[2], const_args[2],
-                         args[5], small);
-        break;
-    case TCG_COND_LEU:
-        tcg_out_brcond32(s, TCG_COND_LTU, args[1], args[3], const_args[3],
-                         args[5], small);
-        tcg_out_jxx(s, JCC_JNE, label_next, 1);
-        tcg_out_brcond32(s, TCG_COND_LEU, args[0], args[2], const_args[2],
-                         args[5], small);
-        break;
-    case TCG_COND_GTU:
-        tcg_out_brcond32(s, TCG_COND_GTU, args[1], args[3], const_args[3],
-                         args[5], small);
-        tcg_out_jxx(s, JCC_JNE, label_next, 1);
-        tcg_out_brcond32(s, TCG_COND_GTU, args[0], args[2], const_args[2],
-                         args[5], small);
-        break;
-    case TCG_COND_GEU:
-        tcg_out_brcond32(s, TCG_COND_GTU, args[1], args[3], const_args[3],
-                         args[5], small);
-        tcg_out_jxx(s, JCC_JNE, label_next, 1);
-        tcg_out_brcond32(s, TCG_COND_GEU, args[0], args[2], const_args[2],
-                         args[5], small);
-        break;
-    default:
-        tcg_abort();
-    }
-    tcg_out_label(s, label_next, (tcg_target_long)s->code_ptr);
-}
-#endif
 
 static void tcg_out_setcond32(TCGContext *s, TCGCond cond, TCGArg dest,
                               TCGArg arg1, TCGArg arg2, int const_arg2)
@@ -887,7 +715,6 @@  static void tcg_out_setcond32(TCGContext *s, TCGCond cond, TCGArg dest,
     tcg_out_ext8u(s, dest, dest);
 }
 
-#if TCG_TARGET_REG_BITS == 64
 static void tcg_out_setcond64(TCGContext *s, TCGCond cond, TCGArg dest,
                               TCGArg arg1, TCGArg arg2, int const_arg2)
 {
@@ -895,49 +722,6 @@  static void tcg_out_setcond64(TCGContext *s, TCGCond cond, TCGArg dest,
     tcg_out_modrm(s, OPC_SETCC | tcg_cond_to_jcc[cond], 0, dest);
     tcg_out_ext8u(s, dest, dest);
 }
-#else
-static void tcg_out_setcond2(TCGContext *s, const TCGArg *args,
-                             const int *const_args)
-{
-    TCGArg new_args[6];
-    int label_true, label_over;
-
-    memcpy(new_args, args+1, 5*sizeof(TCGArg));
-
-    if (args[0] == args[1] || args[0] == args[2]
-        || (!const_args[3] && args[0] == args[3])
-        || (!const_args[4] && args[0] == args[4])) {
-        /* When the destination overlaps with one of the argument
-           registers, don't do anything tricky.  */
-        label_true = gen_new_label();
-        label_over = gen_new_label();
-
-        new_args[5] = label_true;
-        tcg_out_brcond2(s, new_args, const_args+1, 1);
-
-        tcg_out_movi(s, TCG_TYPE_I32, args[0], 0);
-        tcg_out_jxx(s, JCC_JMP, label_over, 1);
-        tcg_out_label(s, label_true, (tcg_target_long)s->code_ptr);
-
-        tcg_out_movi(s, TCG_TYPE_I32, args[0], 1);
-        tcg_out_label(s, label_over, (tcg_target_long)s->code_ptr);
-    } else {
-        /* When the destination does not overlap one of the arguments,
-           clear the destination first, jump if cond false, and emit an
-           increment in the true case.  This results in smaller code.  */
-
-        tcg_out_movi(s, TCG_TYPE_I32, args[0], 0);
-
-        label_over = gen_new_label();
-        new_args[4] = tcg_invert_cond(new_args[4]);
-        new_args[5] = label_over;
-        tcg_out_brcond2(s, new_args, const_args+1, 1);
-
-        tgen_arithi(s, ARITH_ADD, args[0], 1, 0);
-        tcg_out_label(s, label_over, (tcg_target_long)s->code_ptr);
-    }
-}
-#endif
 
 static void tcg_out_branch(TCGContext *s, int call, tcg_target_long dest)
 {
@@ -1014,7 +798,7 @@  static inline void tcg_out_tlb_load(TCGContext *s, int addrlo_idx,
     TCGType type = TCG_TYPE_I32;
     int rexw = 0;
 
-    if (TCG_TARGET_REG_BITS == 64 && TARGET_LONG_BITS == 64) {
+    if (TARGET_LONG_BITS == 64) {
         type = TCG_TYPE_I64;
         rexw = P_REXW;
     }
@@ -1044,16 +828,6 @@  static inline void tcg_out_tlb_load(TCGContext *s, int addrlo_idx,
     label_ptr[0] = s->code_ptr;
     s->code_ptr++;
 
-    if (TARGET_LONG_BITS > TCG_TARGET_REG_BITS) {
-        /* cmp 4(r1), addrhi */
-        tcg_out_modrm_offset(s, OPC_CMP_GvEv, args[addrlo_idx+1], r1, 4);
-
-        /* jne label1 */
-        tcg_out8(s, OPC_JCC_short + JCC_JNE);
-        label_ptr[1] = s->code_ptr;
-        s->code_ptr++;
-    }
-
     /* TLB Hit.  */
 
     /* add addend(r1), r0 */
@@ -1098,7 +872,6 @@  static void tcg_out_qemu_ld_direct(TCGContext *s, int datalo, int datahi,
             tcg_out_bswap32(s, datalo);
         }
         break;
-#if TCG_TARGET_REG_BITS == 64
     case 2 | 4:
         if (bswap) {
             tcg_out_ld(s, TCG_TYPE_I32, datalo, base, ofs);
@@ -1108,30 +881,10 @@  static void tcg_out_qemu_ld_direct(TCGContext *s, int datalo, int datahi,
             tcg_out_modrm_offset(s, OPC_MOVSLQ, datalo, base, ofs);
         }
         break;
-#endif
     case 3:
-        if (TCG_TARGET_REG_BITS == 64) {
-            tcg_out_ld(s, TCG_TYPE_I64, datalo, base, ofs);
-            if (bswap) {
-                tcg_out_bswap64(s, datalo);
-            }
-        } else {
-            if (bswap) {
-                int t = datalo;
-                datalo = datahi;
-                datahi = t;
-            }
-            if (base != datalo) {
-                tcg_out_ld(s, TCG_TYPE_I32, datalo, base, ofs);
-                tcg_out_ld(s, TCG_TYPE_I32, datahi, base, ofs + 4);
-            } else {
-                tcg_out_ld(s, TCG_TYPE_I32, datahi, base, ofs + 4);
-                tcg_out_ld(s, TCG_TYPE_I32, datalo, base, ofs);
-            }
-            if (bswap) {
-                tcg_out_bswap32(s, datalo);
-                tcg_out_bswap32(s, datahi);
-            }
+        tcg_out_ld(s, TCG_TYPE_I64, datalo, base, ofs);
+        if (bswap) {
+            tcg_out_bswap64(s, datalo);
         }
         break;
     default:
@@ -1160,7 +913,7 @@  static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args,
     }
 
 #if defined(CONFIG_SOFTMMU)
-    mem_index = args[addrlo_idx + 1 + (TARGET_LONG_BITS > TCG_TARGET_REG_BITS)];
+    mem_index = args[addrlo_idx + 1];
     s_bits = opc & 3;
 
     tcg_out_tlb_load(s, addrlo_idx, mem_index, s_bits, args,
@@ -1186,46 +939,31 @@  static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args,
     /* XXX: move that code at the end of the TB */
     /* The first argument is already loaded with addrlo.  */
     arg_idx = 1;
-    if (TCG_TARGET_REG_BITS == 32 && TARGET_LONG_BITS == 64) {
-        tcg_out_mov(s, TCG_TYPE_I32, tcg_target_call_iarg_regs[arg_idx++],
-                    args[addrlo_idx + 1]);
-    }
     tcg_out_movi(s, TCG_TYPE_I32, tcg_target_call_iarg_regs[arg_idx],
                  mem_index);
     tcg_out_calli(s, (tcg_target_long)qemu_ld_helpers[s_bits]);
 
     switch(opc) {
     case 0 | 4:
-        tcg_out_ext8s(s, data_reg, TCG_REG_EAX, P_REXW);
+        tcg_out_ext8s(s, data_reg, TCG_REG_RAX, P_REXW);
         break;
     case 1 | 4:
-        tcg_out_ext16s(s, data_reg, TCG_REG_EAX, P_REXW);
+        tcg_out_ext16s(s, data_reg, TCG_REG_RAX, P_REXW);
         break;
     case 0:
-        tcg_out_ext8u(s, data_reg, TCG_REG_EAX);
+        tcg_out_ext8u(s, data_reg, TCG_REG_RAX);
         break;
     case 1:
-        tcg_out_ext16u(s, data_reg, TCG_REG_EAX);
+        tcg_out_ext16u(s, data_reg, TCG_REG_RAX);
         break;
     case 2:
-        tcg_out_mov(s, TCG_TYPE_I32, data_reg, TCG_REG_EAX);
+        tcg_out_mov(s, TCG_TYPE_I32, data_reg, TCG_REG_RAX);
         break;
-#if TCG_TARGET_REG_BITS == 64
     case 2 | 4:
-        tcg_out_ext32s(s, data_reg, TCG_REG_EAX);
+        tcg_out_ext32s(s, data_reg, TCG_REG_RAX);
         break;
-#endif
     case 3:
-        if (TCG_TARGET_REG_BITS == 64) {
-            tcg_out_mov(s, TCG_TYPE_I64, data_reg, TCG_REG_RAX);
-        } else if (data_reg == TCG_REG_EDX) {
-            /* xchg %edx, %eax */
-            tcg_out_opc(s, OPC_XCHG_ax_r32 + TCG_REG_EDX, 0, 0, 0);
-            tcg_out_mov(s, TCG_TYPE_I32, data_reg2, TCG_REG_EAX);
-        } else {
-            tcg_out_mov(s, TCG_TYPE_I32, data_reg, TCG_REG_EAX);
-            tcg_out_mov(s, TCG_TYPE_I32, data_reg2, TCG_REG_EDX);
-        }
+        tcg_out_mov(s, TCG_TYPE_I64, data_reg, TCG_REG_RAX);
         break;
     default:
         tcg_abort();
@@ -1238,18 +976,16 @@  static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args,
         int32_t offset = GUEST_BASE;
         int base = args[addrlo_idx];
 
-        if (TCG_TARGET_REG_BITS == 64) {
-            /* ??? We assume all operations have left us with register
-               contents that are zero extended.  So far this appears to
-               be true.  If we want to enforce this, we can either do
-               an explicit zero-extension here, or (if GUEST_BASE == 0)
-               use the ADDR32 prefix.  For now, do nothing.  */
-
-            if (offset != GUEST_BASE) {
-                tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_RDI, GUEST_BASE);
-                tgen_arithr(s, ARITH_ADD + P_REXW, TCG_REG_RDI, base);
-                base = TCG_REG_RDI, offset = 0;
-            }
+        /* ??? We assume all operations have left us with register
+           contents that are zero extended.  So far this appears to
+           be true.  If we want to enforce this, we can either do
+           an explicit zero-extension here, or (if GUEST_BASE == 0)
+           use the ADDR32 prefix.  For now, do nothing.  */
+
+        if (offset != GUEST_BASE) {
+            tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_RDI, GUEST_BASE);
+            tgen_arithr(s, ARITH_ADD + P_REXW, TCG_REG_RDI, base);
+            base = TCG_REG_RDI, offset = 0;
         }
 
         tcg_out_qemu_ld_direct(s, data_reg, data_reg2, base, offset, opc);
@@ -1292,24 +1028,12 @@  static void tcg_out_qemu_st_direct(TCGContext *s, int datalo, int datahi,
         tcg_out_st(s, TCG_TYPE_I32, datalo, base, ofs);
         break;
     case 3:
-        if (TCG_TARGET_REG_BITS == 64) {
-            if (bswap) {
-                tcg_out_mov(s, TCG_TYPE_I64, scratch, datalo);
-                tcg_out_bswap64(s, scratch);
-                datalo = scratch;
-            }
-            tcg_out_st(s, TCG_TYPE_I64, datalo, base, ofs);
-        } else if (bswap) {
-            tcg_out_mov(s, TCG_TYPE_I32, scratch, datahi);
-            tcg_out_bswap32(s, scratch);
-            tcg_out_st(s, TCG_TYPE_I32, scratch, base, ofs);
-            tcg_out_mov(s, TCG_TYPE_I32, scratch, datalo);
-            tcg_out_bswap32(s, scratch);
-            tcg_out_st(s, TCG_TYPE_I32, scratch, base, ofs + 4);
-        } else {
-            tcg_out_st(s, TCG_TYPE_I32, datalo, base, ofs);
-            tcg_out_st(s, TCG_TYPE_I32, datahi, base, ofs + 4);
+        if (bswap) {
+            tcg_out_mov(s, TCG_TYPE_I64, scratch, datalo);
+            tcg_out_bswap64(s, scratch);
+            datalo = scratch;
         }
+        tcg_out_st(s, TCG_TYPE_I64, datalo, base, ofs);
         break;
     default:
         tcg_abort();
@@ -1329,13 +1053,9 @@  static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args,
 
     data_reg = args[0];
     addrlo_idx = 1;
-    if (TCG_TARGET_REG_BITS == 32 && opc == 3) {
-        data_reg2 = args[1];
-        addrlo_idx = 2;
-    }
 
 #if defined(CONFIG_SOFTMMU)
-    mem_index = args[addrlo_idx + 1 + (TARGET_LONG_BITS > TCG_TARGET_REG_BITS)];
+    mem_index = args[addrlo_idx + 1];
     s_bits = opc;
 
     tcg_out_tlb_load(s, addrlo_idx, mem_index, s_bits, args,
@@ -1359,51 +1079,16 @@  static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args,
     }
 
     /* XXX: move that code at the end of the TB */
-    if (TCG_TARGET_REG_BITS == 64) {
-        tcg_out_mov(s, (opc == 3 ? TCG_TYPE_I64 : TCG_TYPE_I32),
-                    TCG_REG_RSI, data_reg);
-        tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_RDX, mem_index);
-        stack_adjust = 0;
-    } else if (TARGET_LONG_BITS == 32) {
-        tcg_out_mov(s, TCG_TYPE_I32, TCG_REG_EDX, data_reg);
-        if (opc == 3) {
-            tcg_out_mov(s, TCG_TYPE_I32, TCG_REG_ECX, data_reg2);
-            tcg_out_pushi(s, mem_index);
-            stack_adjust = 4;
-        } else {
-            tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_ECX, mem_index);
-            stack_adjust = 0;
-        }
-    } else {
-        if (opc == 3) {
-            tcg_out_mov(s, TCG_TYPE_I32, TCG_REG_EDX, args[addrlo_idx + 1]);
-            tcg_out_pushi(s, mem_index);
-            tcg_out_push(s, data_reg2);
-            tcg_out_push(s, data_reg);
-            stack_adjust = 12;
-        } else {
-            tcg_out_mov(s, TCG_TYPE_I32, TCG_REG_EDX, args[addrlo_idx + 1]);
-            switch(opc) {
-            case 0:
-                tcg_out_ext8u(s, TCG_REG_ECX, data_reg);
-                break;
-            case 1:
-                tcg_out_ext16u(s, TCG_REG_ECX, data_reg);
-                break;
-            case 2:
-                tcg_out_mov(s, TCG_TYPE_I32, TCG_REG_ECX, data_reg);
-                break;
-            }
-            tcg_out_pushi(s, mem_index);
-            stack_adjust = 4;
-        }
-    }
+    tcg_out_mov(s, (opc == 3 ? TCG_TYPE_I64 : TCG_TYPE_I32),
+                TCG_REG_RSI, data_reg);
+    tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_RDX, mem_index);
+    stack_adjust = 0;
 
     tcg_out_calli(s, (tcg_target_long)qemu_st_helpers[s_bits]);
 
     if (stack_adjust == (TCG_TARGET_REG_BITS / 8)) {
         /* Pop and discard.  This is 2 bytes smaller than the add.  */
-        tcg_out_pop(s, TCG_REG_ECX);
+        tcg_out_pop(s, TCG_REG_RCX);
     } else if (stack_adjust != 0) {
         tcg_out_addi(s, TCG_REG_CALL_STACK, stack_adjust);
     }
@@ -1415,18 +1100,16 @@  static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args,
         int32_t offset = GUEST_BASE;
         int base = args[addrlo_idx];
 
-        if (TCG_TARGET_REG_BITS == 64) {
-            /* ??? We assume all operations have left us with register
-               contents that are zero extended.  So far this appears to
-               be true.  If we want to enforce this, we can either do
-               an explicit zero-extension here, or (if GUEST_BASE == 0)
-               use the ADDR32 prefix.  For now, do nothing.  */
-
-            if (offset != GUEST_BASE) {
-                tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_RDI, GUEST_BASE);
-                tgen_arithr(s, ARITH_ADD + P_REXW, TCG_REG_RDI, base);
-                base = TCG_REG_RDI, offset = 0;
-            }
+        /* ??? We assume all operations have left us with register
+           contents that are zero extended.  So far this appears to
+           be true.  If we want to enforce this, we can either do
+           an explicit zero-extension here, or (if GUEST_BASE == 0)
+           use the ADDR32 prefix.  For now, do nothing.  */
+
+        if (offset != GUEST_BASE) {
+            tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_RDI, GUEST_BASE);
+            tgen_arithr(s, ARITH_ADD + P_REXW, TCG_REG_RDI, base);
+            base = TCG_REG_RDI, offset = 0;
         }
 
         tcg_out_qemu_st_direct(s, data_reg, data_reg2, base, offset, opc);
@@ -1439,19 +1122,14 @@  static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
 {
     int c, rexw = 0;
 
-#if TCG_TARGET_REG_BITS == 64
 # define OP_32_64(x) \
         case glue(glue(INDEX_op_, x), _i64): \
             rexw = P_REXW; /* FALLTHRU */    \
         case glue(glue(INDEX_op_, x), _i32)
-#else
-# define OP_32_64(x) \
-        case glue(glue(INDEX_op_, x), _i32)
-#endif
 
     switch(opc) {
     case INDEX_op_exit_tb:
-        tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_EAX, args[0]);
+        tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_RAX, args[0]);
         tcg_out_jmp(s, (tcg_target_long) tb_ret_addr);
         break;
     case INDEX_op_goto_tb:
@@ -1503,9 +1181,7 @@  static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
     OP_32_64(ld16s):
         tcg_out_modrm_offset(s, OPC_MOVSWL + rexw, args[0], args[1], args[2]);
         break;
-#if TCG_TARGET_REG_BITS == 64
     case INDEX_op_ld32u_i64:
-#endif
     case INDEX_op_ld_i32:
         tcg_out_ld(s, TCG_TYPE_I32, args[0], args[1], args[2]);
         break;
@@ -1518,9 +1194,7 @@  static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
         tcg_out_modrm_offset(s, OPC_MOVL_EvGv | P_DATA16,
                              args[0], args[1], args[2]);
         break;
-#if TCG_TARGET_REG_BITS == 64
     case INDEX_op_st32_i64:
-#endif
     case INDEX_op_st_i32:
         tcg_out_st(s, TCG_TYPE_I32, args[0], args[1], args[2]);
         break;
@@ -1658,9 +1332,7 @@  static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
     case INDEX_op_qemu_ld16s:
         tcg_out_qemu_ld(s, args, 1 | 4);
         break;
-#if TCG_TARGET_REG_BITS == 64
     case INDEX_op_qemu_ld32u:
-#endif
     case INDEX_op_qemu_ld32:
         tcg_out_qemu_ld(s, args, 2);
         break;
@@ -1681,41 +1353,6 @@  static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
         tcg_out_qemu_st(s, args, 3);
         break;
 
-#if TCG_TARGET_REG_BITS == 32
-    case INDEX_op_brcond2_i32:
-        tcg_out_brcond2(s, args, const_args, 0);
-        break;
-    case INDEX_op_setcond2_i32:
-        tcg_out_setcond2(s, args, const_args);
-        break;
-    case INDEX_op_mulu2_i32:
-        tcg_out_modrm(s, OPC_GRP3_Ev, EXT3_MUL, args[3]);
-        break;
-    case INDEX_op_add2_i32:
-        if (const_args[4]) {
-            tgen_arithi(s, ARITH_ADD, args[0], args[4], 1);
-        } else {
-            tgen_arithr(s, ARITH_ADD, args[0], args[4]);
-        }
-        if (const_args[5]) {
-            tgen_arithi(s, ARITH_ADC, args[1], args[5], 1);
-        } else {
-            tgen_arithr(s, ARITH_ADC, args[1], args[5]);
-        }
-        break;
-    case INDEX_op_sub2_i32:
-        if (const_args[4]) {
-            tgen_arithi(s, ARITH_SUB, args[0], args[4], 1);
-        } else {
-            tgen_arithr(s, ARITH_SUB, args[0], args[4]);
-        }
-        if (const_args[5]) {
-            tgen_arithi(s, ARITH_SBB, args[1], args[5], 1);
-        } else {
-            tgen_arithr(s, ARITH_SBB, args[1], args[5]);
-        }
-        break;
-#else /* TCG_TARGET_REG_BITS == 64 */
     case INDEX_op_movi_i64:
         tcg_out_movi(s, TCG_TYPE_I64, args[0], args[1]);
         break;
@@ -1750,7 +1387,6 @@  static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
     case INDEX_op_ext32s_i64:
         tcg_out_ext32s(s, args[0], args[1]);
         break;
-#endif
 
     OP_32_64(deposit):
         if (args[3] == 0 && args[4] == 8) {
@@ -1825,13 +1461,6 @@  static const TCGTargetOpDef x86_op_defs[] = {
 
     { INDEX_op_deposit_i32, { "Q", "0", "Q" } },
 
-#if TCG_TARGET_REG_BITS == 32
-    { INDEX_op_mulu2_i32, { "a", "d", "a", "r" } },
-    { INDEX_op_add2_i32, { "r", "r", "0", "1", "ri", "ri" } },
-    { INDEX_op_sub2_i32, { "r", "r", "0", "1", "ri", "ri" } },
-    { INDEX_op_brcond2_i32, { "r", "r", "ri", "ri" } },
-    { INDEX_op_setcond2_i32, { "r", "r", "r", "ri", "ri" } },
-#else
     { INDEX_op_mov_i64, { "r", "r" } },
     { INDEX_op_movi_i64, { "r" } },
     { INDEX_op_ld8u_i64, { "r", "r" } },
@@ -1878,9 +1507,7 @@  static const TCGTargetOpDef x86_op_defs[] = {
     { INDEX_op_ext32u_i64, { "r", "r" } },
 
     { INDEX_op_deposit_i64, { "Q", "0", "Q" } },
-#endif
 
-#if TCG_TARGET_REG_BITS == 64
     { INDEX_op_qemu_ld8u, { "r", "L" } },
     { INDEX_op_qemu_ld8s, { "r", "L" } },
     { INDEX_op_qemu_ld16u, { "r", "L" } },
@@ -1894,48 +1521,16 @@  static const TCGTargetOpDef x86_op_defs[] = {
     { INDEX_op_qemu_st16, { "L", "L" } },
     { INDEX_op_qemu_st32, { "L", "L" } },
     { INDEX_op_qemu_st64, { "L", "L" } },
-#elif TARGET_LONG_BITS <= TCG_TARGET_REG_BITS
-    { INDEX_op_qemu_ld8u, { "r", "L" } },
-    { INDEX_op_qemu_ld8s, { "r", "L" } },
-    { INDEX_op_qemu_ld16u, { "r", "L" } },
-    { INDEX_op_qemu_ld16s, { "r", "L" } },
-    { INDEX_op_qemu_ld32, { "r", "L" } },
-    { INDEX_op_qemu_ld64, { "r", "r", "L" } },
-
-    { INDEX_op_qemu_st8, { "cb", "L" } },
-    { INDEX_op_qemu_st16, { "L", "L" } },
-    { INDEX_op_qemu_st32, { "L", "L" } },
-    { INDEX_op_qemu_st64, { "L", "L", "L" } },
-#else
-    { INDEX_op_qemu_ld8u, { "r", "L", "L" } },
-    { INDEX_op_qemu_ld8s, { "r", "L", "L" } },
-    { INDEX_op_qemu_ld16u, { "r", "L", "L" } },
-    { INDEX_op_qemu_ld16s, { "r", "L", "L" } },
-    { INDEX_op_qemu_ld32, { "r", "L", "L" } },
-    { INDEX_op_qemu_ld64, { "r", "r", "L", "L" } },
-
-    { INDEX_op_qemu_st8, { "cb", "L", "L" } },
-    { INDEX_op_qemu_st16, { "L", "L", "L" } },
-    { INDEX_op_qemu_st32, { "L", "L", "L" } },
-    { INDEX_op_qemu_st64, { "L", "L", "L", "L" } },
-#endif
     { -1 },
 };
 
 static int tcg_target_callee_save_regs[] = {
-#if TCG_TARGET_REG_BITS == 64
     TCG_REG_RBP,
     TCG_REG_RBX,
     TCG_REG_R12,
     TCG_REG_R13,
     TCG_REG_R14, /* Currently used for the global env. */
     TCG_REG_R15,
-#else
-    TCG_REG_EBP, /* Currently used for the global env. */
-    TCG_REG_EBX,
-    TCG_REG_ESI,
-    TCG_REG_EDI,
-#endif
 };
 
 /* Generate global QEMU prologue and epilogue code */
@@ -1962,7 +1557,7 @@  static void tcg_target_qemu_prologue(TCGContext *s)
         tcg_out_push(s, tcg_target_callee_save_regs[i]);
     }
 
-    tcg_out_addi(s, TCG_REG_ESP, -stack_addend);
+    tcg_out_addi(s, TCG_REG_RSP, -stack_addend);
 
     tcg_out_mov(s, TCG_TYPE_PTR, TCG_AREG0, tcg_target_call_iarg_regs[0]);
 
@@ -1988,25 +1583,19 @@  static void tcg_target_init(TCGContext *s)
         tcg_abort();
 #endif
 
-    if (TCG_TARGET_REG_BITS == 64) {
-        tcg_regset_set32(tcg_target_available_regs[TCG_TYPE_I32], 0, 0xffff);
-        tcg_regset_set32(tcg_target_available_regs[TCG_TYPE_I64], 0, 0xffff);
-    } else {
-        tcg_regset_set32(tcg_target_available_regs[TCG_TYPE_I32], 0, 0xff);
-    }
+    tcg_regset_set32(tcg_target_available_regs[TCG_TYPE_I32], 0, 0xffff);
+    tcg_regset_set32(tcg_target_available_regs[TCG_TYPE_I64], 0, 0xffff);
 
     tcg_regset_clear(tcg_target_call_clobber_regs);
-    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_EAX);
-    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_EDX);
-    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_ECX);
-    if (TCG_TARGET_REG_BITS == 64) {
-        tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_RDI);
-        tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_RSI);
-        tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R8);
-        tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R9);
-        tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R10);
-        tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R11);
-    }
+    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_RAX);
+    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_RDX);
+    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_RCX);
+    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_RDI);
+    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_RSI);
+    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R8);
+    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R9);
+    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R10);
+    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R11);
 
     tcg_regset_clear(s->reserved_regs);
     tcg_regset_set_reg(s->reserved_regs, TCG_REG_CALL_STACK);
diff --git a/tcg/x86_64/tcg-target.h b/tcg/x86_64/tcg-target.h
index ecfea9e..44436d0 100644
--- a/tcg/x86_64/tcg-target.h
+++ b/tcg/x86_64/tcg-target.h
@@ -25,24 +25,17 @@ 
 
 //#define TCG_TARGET_WORDS_BIGENDIAN
 
-#if TCG_TARGET_REG_BITS == 64
-# define TCG_TARGET_NB_REGS 16
-#else
-# define TCG_TARGET_NB_REGS 8
-#endif
+#define TCG_TARGET_NB_REGS 16
 
 typedef enum {
-    TCG_REG_EAX = 0,
-    TCG_REG_ECX,
-    TCG_REG_EDX,
-    TCG_REG_EBX,
-    TCG_REG_ESP,
-    TCG_REG_EBP,
-    TCG_REG_ESI,
-    TCG_REG_EDI,
-
-    /* 64-bit registers; always define the symbols to avoid
-       too much if-deffing.  */
+    TCG_REG_RAX = 0,
+    TCG_REG_RCX,
+    TCG_REG_RDX,
+    TCG_REG_RBX,
+    TCG_REG_RSP,
+    TCG_REG_RBP,
+    TCG_REG_RSI,
+    TCG_REG_RDI,
     TCG_REG_R8,
     TCG_REG_R9,
     TCG_REG_R10,
@@ -51,21 +44,13 @@  typedef enum {
     TCG_REG_R13,
     TCG_REG_R14,
     TCG_REG_R15,
-    TCG_REG_RAX = TCG_REG_EAX,
-    TCG_REG_RCX = TCG_REG_ECX,
-    TCG_REG_RDX = TCG_REG_EDX,
-    TCG_REG_RBX = TCG_REG_EBX,
-    TCG_REG_RSP = TCG_REG_ESP,
-    TCG_REG_RBP = TCG_REG_EBP,
-    TCG_REG_RSI = TCG_REG_ESI,
-    TCG_REG_RDI = TCG_REG_EDI,
 } TCGReg;
 
 #define TCG_CT_CONST_S32 0x100
 #define TCG_CT_CONST_U32 0x200
 
 /* used for function call generation */
-#define TCG_REG_CALL_STACK TCG_REG_ESP
+#define TCG_REG_CALL_STACK TCG_REG_RSP
 #define TCG_TARGET_STACK_ALIGN 16
 #define TCG_TARGET_CALL_STACK_OFFSET 0
 
@@ -87,7 +72,6 @@  typedef enum {
 #define TCG_TARGET_HAS_nor_i32          0
 #define TCG_TARGET_HAS_deposit_i32      1
 
-#if TCG_TARGET_REG_BITS == 64
 #define TCG_TARGET_HAS_div2_i64         1
 #define TCG_TARGET_HAS_rot_i64          1
 #define TCG_TARGET_HAS_ext8s_i64        1
@@ -107,7 +91,6 @@  typedef enum {
 #define TCG_TARGET_HAS_nand_i64         0
 #define TCG_TARGET_HAS_nor_i64          0
 #define TCG_TARGET_HAS_deposit_i64      1
-#endif
 
 #define TCG_TARGET_deposit_i32_valid(ofs, len) \
     (((ofs) == 0 && (len) == 8) || ((ofs) == 8 && (len) == 8) || \
@@ -117,11 +100,7 @@  typedef enum {
 #define TCG_TARGET_HAS_GUEST_BASE
 
 /* Note: must be synced with dyngen-exec.h */
-#if TCG_TARGET_REG_BITS == 64
-# define TCG_AREG0 TCG_REG_R14
-#else
-# define TCG_AREG0 TCG_REG_EBP
-#endif
+#define TCG_AREG0 TCG_REG_R14
 
 static inline void flush_icache_range(unsigned long start, unsigned long stop)
 {
-- 
1.7.2.5