@@ -240,6 +240,8 @@ static inline int tcg_target_const_match(tcg_target_long val,
# define P_REXB_R 0
# define P_REXB_RM 0
#endif
+#define P_FS 0x4000
+#define P_GS 0x8000
#define OPC_ARITH_EvIz (0x81)
#define OPC_ARITH_EvIb (0x83)
@@ -347,11 +349,29 @@ static const uint8_t tcg_cond_to_jcc[10] = {
[TCG_COND_GTU] = JCC_JA,
};
+static inline void tcg_out_seg_prefix(TCGContext *s, int opc)
+{
+ switch (opc & (P_FS | P_GS)) {
+ case 0:
+ break;
+ case P_FS:
+ tcg_out8(s, 0x64);
+ break;
+ case P_GS:
+ tcg_out8(s, 0x65);
+ break;
+ default:
+ tcg_abort();
+ }
+}
+
#if TCG_TARGET_REG_BITS == 64
static void tcg_out_opc(TCGContext *s, int opc, int r, int rm, int x)
{
int rex;
+ tcg_out_seg_prefix(s, opc);
+
if (opc & P_DATA16) {
/* We should never be asking for both 16 and 64-bit operation. */
assert((opc & P_REXW) == 0);
@@ -387,6 +407,8 @@ static void tcg_out_opc(TCGContext *s, int opc, int r, int rm, int x)
#else
static void tcg_out_opc(TCGContext *s, int opc)
{
+ tcg_out_seg_prefix(s, opc);
+
if (opc & P_DATA16) {
tcg_out8(s, 0x66);
}
@@ -956,6 +978,48 @@ static void tcg_out_jmp(TCGContext *s, tcg_target_long dest)
tcg_out_branch(s, 0, dest);
}
+#ifndef GUEST_BASE
+#define GUEST_BASE 0
+#endif
+
+#if defined(__x86_64__) && defined(__linux__)
+# include <sys/syscall.h>
+# include <asm/prctl.h>
+
+static int guest_base_flags;
+static inline void setup_guest_base_seg(void)
+{
+ if (syscall(__NR_arch_prctl, ARCH_SET_GS, GUEST_BASE) == 0) {
+ guest_base_flags = P_GS;
+ }
+}
+#elif defined(__i386__) && defined(__linux__)
+# include <sys/syscall.h>
+# include <asm/ldt.h>
+
+static int guest_base_flags;
+static inline void setup_guest_base_seg(void)
+{
+ struct user_desc d;
+
+ memset(&d, 0, sizeof(d));
+ d.entry_number = -1; /* let the kernel choose */
+ d.base_addr = GUEST_BASE;
+ d.limit = 0xfffff; /* 4GB segment */
+ d.seg_32bit = 1;
+ d.limit_in_pages = 1;
+ d.useable = 1;
+
+ if (syscall(__NR_set_thread_area, &d) == 0) {
+ asm volatile("movw %w0, %%fs" : : "r"(d.entry_number * 8 + 3));
+ guest_base_flags = P_FS;
+ }
+}
+#else
+# define guest_base_flags 0
+static inline void setup_guest_base_seg(void) { }
+#endif
+
#if defined(CONFIG_SOFTMMU)
#include "../../softmmu_defs.h"
@@ -1056,37 +1120,41 @@ static inline void tcg_out_tlb_load(TCGContext *s, int addrlo_idx,
#endif
static void tcg_out_qemu_ld_direct(TCGContext *s, int datalo, int datahi,
- int base, tcg_target_long ofs, int sizeop)
+ int base, tcg_target_long ofs, int sizeop,
+ int prefix)
{
#ifdef TARGET_WORDS_BIGENDIAN
const int bswap = 1;
#else
const int bswap = 0;
#endif
+ int rexw = (TARGET_LONG_BITS == 64 ? P_REXW : 0);
+
switch (sizeop) {
case 0:
- tcg_out_modrm_offset(s, OPC_MOVZBL, datalo, base, ofs);
+ tcg_out_modrm_offset(s, OPC_MOVZBL + prefix, datalo, base, ofs);
break;
case 0 | 4:
- tcg_out_modrm_offset(s, OPC_MOVSBL + P_REXW, datalo, base, ofs);
+ tcg_out_modrm_offset(s, OPC_MOVSBL + prefix + rexw, datalo, base, ofs);
break;
case 1:
- tcg_out_modrm_offset(s, OPC_MOVZWL, datalo, base, ofs);
+ tcg_out_modrm_offset(s, OPC_MOVZWL + prefix, datalo, base, ofs);
if (bswap) {
tcg_out_rolw_8(s, datalo);
}
break;
case 1 | 4:
if (bswap) {
- tcg_out_modrm_offset(s, OPC_MOVZWL, datalo, base, ofs);
+ tcg_out_modrm_offset(s, OPC_MOVZWL + prefix, datalo, base, ofs);
tcg_out_rolw_8(s, datalo);
- tcg_out_modrm(s, OPC_MOVSWL + P_REXW, datalo, datalo);
+ tcg_out_modrm(s, OPC_MOVSWL + rexw, datalo, datalo);
} else {
- tcg_out_modrm_offset(s, OPC_MOVSWL + P_REXW, datalo, base, ofs);
+ tcg_out_modrm_offset(s, OPC_MOVSWL + prefix + rexw,
+ datalo, base, ofs);
}
break;
case 2:
- tcg_out_ld(s, TCG_TYPE_I32, datalo, base, ofs);
+ tcg_out_modrm_offset(s, OPC_MOVL_GvEv + prefix, datalo, base, ofs);
if (bswap) {
tcg_out_bswap32(s, datalo);
}
@@ -1094,17 +1162,18 @@ static void tcg_out_qemu_ld_direct(TCGContext *s, int datalo, int datahi,
#if TCG_TARGET_REG_BITS == 64
case 2 | 4:
if (bswap) {
- tcg_out_ld(s, TCG_TYPE_I32, datalo, base, ofs);
+ tcg_out_modrm_offset(s, OPC_MOVL_GvEv + prefix, datalo, base, ofs);
tcg_out_bswap32(s, datalo);
tcg_out_ext32s(s, datalo, datalo);
} else {
- tcg_out_modrm_offset(s, OPC_MOVSLQ, datalo, base, ofs);
+ tcg_out_modrm_offset(s, OPC_MOVSLQ + prefix, datalo, base, ofs);
}
break;
#endif
case 3:
if (TCG_TARGET_REG_BITS == 64) {
- tcg_out_ld(s, TCG_TYPE_I64, datalo, base, ofs);
+ tcg_out_modrm_offset(s, OPC_MOVL_GvEv + prefix + P_REXW,
+ datalo, base, ofs);
if (bswap) {
tcg_out_bswap64(s, datalo);
}
@@ -1115,11 +1184,15 @@ static void tcg_out_qemu_ld_direct(TCGContext *s, int datalo, int datahi,
datahi = t;
}
if (base != datalo) {
- tcg_out_ld(s, TCG_TYPE_I32, datalo, base, ofs);
- tcg_out_ld(s, TCG_TYPE_I32, datahi, base, ofs + 4);
+ tcg_out_modrm_offset(s, OPC_MOVL_GvEv + prefix,
+ datalo, base, ofs);
+ tcg_out_modrm_offset(s, OPC_MOVL_GvEv + prefix,
+ datahi, base, ofs + 4);
} else {
- tcg_out_ld(s, TCG_TYPE_I32, datahi, base, ofs + 4);
- tcg_out_ld(s, TCG_TYPE_I32, datalo, base, ofs);
+ tcg_out_modrm_offset(s, OPC_MOVL_GvEv + prefix,
+ datahi, base, ofs + 4);
+ tcg_out_modrm_offset(s, OPC_MOVL_GvEv + prefix,
+ datalo, base, ofs);
}
if (bswap) {
tcg_out_bswap32(s, datalo);
@@ -1135,8 +1208,7 @@ static void tcg_out_qemu_ld_direct(TCGContext *s, int datalo, int datahi,
/* XXX: qemu_ld and qemu_st could be modified to clobber only EDX and
EAX. It will be useful once fixed registers globals are less
common. */
-static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args,
- int opc)
+static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, int opc)
{
int data_reg, data_reg2 = 0;
int addrlo_idx;
@@ -1161,7 +1233,7 @@ static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args,
/* TLB Hit. */
tcg_out_qemu_ld_direct(s, data_reg, data_reg2,
- tcg_target_call_iarg_regs[0], 0, opc);
+ tcg_target_call_iarg_regs[0], 0, opc, 0);
/* jmp label2 */
tcg_out8(s, OPC_JMP_short);
@@ -1230,28 +1302,32 @@ static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args,
{
int32_t offset = GUEST_BASE;
int base = args[addrlo_idx];
-
- if (TCG_TARGET_REG_BITS == 64) {
- /* ??? We assume all operations have left us with register
- contents that are zero extended. So far this appears to
- be true. If we want to enforce this, we can either do
- an explicit zero-extension here, or (if GUEST_BASE == 0)
- use the ADDR32 prefix. For now, do nothing. */
-
- if (offset != GUEST_BASE) {
- tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_RDI, GUEST_BASE);
- tgen_arithr(s, ARITH_ADD + P_REXW, TCG_REG_RDI, base);
- base = TCG_REG_RDI, offset = 0;
- }
+ int prefix = 0;
+
+ /* ??? For 64-bit, we assume all operations have left us with register
+ contents that are zero extended. So far this appears to be true.
+ If we want to enforce this, we can either do an explicit zero
+ extension here, or (if GUEST_BASE == 0 or guest_base_flags) use
+ the ADDR32 prefix. For now, do nothing. */
+
+ if (GUEST_BASE && guest_base_flags) {
+ prefix = guest_base_flags;
+ offset = 0;
+ } else if (TCG_TARGET_REG_BITS == 64 && offset != GUEST_BASE) {
+ tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_RDI, GUEST_BASE);
+ tgen_arithr(s, ARITH_ADD + P_REXW, TCG_REG_RDI, base);
+ base = TCG_REG_RDI, offset = 0;
}
- tcg_out_qemu_ld_direct(s, data_reg, data_reg2, base, offset, opc);
+ tcg_out_qemu_ld_direct(s, data_reg, data_reg2, base,
+ offset, opc, prefix);
}
#endif
}
static void tcg_out_qemu_st_direct(TCGContext *s, int datalo, int datahi,
- int base, tcg_target_long ofs, int sizeop)
+ int base, tcg_target_long ofs, int sizeop,
+ int prefix)
{
#ifdef TARGET_WORDS_BIGENDIAN
const int bswap = 1;
@@ -1266,7 +1342,8 @@ static void tcg_out_qemu_st_direct(TCGContext *s, int datalo, int datahi,
switch (sizeop) {
case 0:
- tcg_out_modrm_offset(s, OPC_MOVB_EvGv + P_REXB_R, datalo, base, ofs);
+ tcg_out_modrm_offset(s, OPC_MOVB_EvGv + P_REXB_R + prefix,
+ datalo, base, ofs);
break;
case 1:
if (bswap) {
@@ -1274,7 +1351,8 @@ static void tcg_out_qemu_st_direct(TCGContext *s, int datalo, int datahi,
tcg_out_rolw_8(s, scratch);
datalo = scratch;
}
- tcg_out_modrm_offset(s, OPC_MOVL_EvGv + P_DATA16, datalo, base, ofs);
+ tcg_out_modrm_offset(s, OPC_MOVL_EvGv + P_DATA16 + prefix,
+ datalo, base, ofs);
break;
case 2:
if (bswap) {
@@ -1282,7 +1360,7 @@ static void tcg_out_qemu_st_direct(TCGContext *s, int datalo, int datahi,
tcg_out_bswap32(s, scratch);
datalo = scratch;
}
- tcg_out_st(s, TCG_TYPE_I32, datalo, base, ofs);
+ tcg_out_modrm_offset(s, OPC_MOVL_EvGv + prefix, datalo, base, ofs);
break;
case 3:
if (TCG_TARGET_REG_BITS == 64) {
@@ -1291,17 +1369,22 @@ static void tcg_out_qemu_st_direct(TCGContext *s, int datalo, int datahi,
tcg_out_bswap64(s, scratch);
datalo = scratch;
}
- tcg_out_st(s, TCG_TYPE_I64, datalo, base, ofs);
+ tcg_out_modrm_offset(s, OPC_MOVL_EvGv + P_REXW + prefix,
+ datalo, base, ofs);
} else if (bswap) {
tcg_out_mov(s, TCG_TYPE_I32, scratch, datahi);
tcg_out_bswap32(s, scratch);
- tcg_out_st(s, TCG_TYPE_I32, scratch, base, ofs);
+ tcg_out_modrm_offset(s, OPC_MOVL_EvGv + prefix,
+ scratch, base, ofs);
tcg_out_mov(s, TCG_TYPE_I32, scratch, datalo);
tcg_out_bswap32(s, scratch);
- tcg_out_st(s, TCG_TYPE_I32, scratch, base, ofs + 4);
+ tcg_out_modrm_offset(s, OPC_MOVL_EvGv + prefix,
+ scratch, base, ofs + 4);
} else {
- tcg_out_st(s, TCG_TYPE_I32, datalo, base, ofs);
- tcg_out_st(s, TCG_TYPE_I32, datahi, base, ofs + 4);
+ tcg_out_modrm_offset(s, OPC_MOVL_EvGv + prefix,
+ datalo, base, ofs);
+ tcg_out_modrm_offset(s, OPC_MOVL_EvGv + prefix,
+ datahi, base, ofs + 4);
}
break;
default:
@@ -1336,7 +1419,7 @@ static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args,
/* TLB Hit. */
tcg_out_qemu_st_direct(s, data_reg, data_reg2,
- tcg_target_call_iarg_regs[0], 0, opc);
+ tcg_target_call_iarg_regs[0], 0, opc, 0);
/* jmp label2 */
tcg_out8(s, OPC_JMP_short);
@@ -1407,22 +1490,25 @@ static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args,
{
int32_t offset = GUEST_BASE;
int base = args[addrlo_idx];
-
- if (TCG_TARGET_REG_BITS == 64) {
- /* ??? We assume all operations have left us with register
- contents that are zero extended. So far this appears to
- be true. If we want to enforce this, we can either do
- an explicit zero-extension here, or (if GUEST_BASE == 0)
- use the ADDR32 prefix. For now, do nothing. */
-
- if (offset != GUEST_BASE) {
- tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_RDI, GUEST_BASE);
- tgen_arithr(s, ARITH_ADD + P_REXW, TCG_REG_RDI, base);
- base = TCG_REG_RDI, offset = 0;
- }
+ int prefix = 0;
+
+ /* ??? For 64-bit, we assume all operations have left us with register
+ contents that are zero extended. So far this appears to be true.
+ If we want to enforce this, we can either do an explicit zero
+ extension here, or (if GUEST_BASE == 0 or guest_base_flags) use
+ the ADDR32 prefix. For now, do nothing. */
+
+ if (GUEST_BASE && guest_base_flags) {
+ prefix = guest_base_flags;
+ offset = 0;
+ } else if (TCG_TARGET_REG_BITS == 64 && offset != GUEST_BASE) {
+ tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_RDI, GUEST_BASE);
+ tgen_arithr(s, ARITH_ADD + P_REXW, TCG_REG_RDI, base);
+ base = TCG_REG_RDI, offset = 0;
}
- tcg_out_qemu_st_direct(s, data_reg, data_reg2, base, offset, opc);
+ tcg_out_qemu_st_direct(s, data_reg, data_reg2, base,
+ offset, opc, prefix);
}
#endif
}
@@ -1945,6 +2031,14 @@ static void tcg_target_qemu_prologue(TCGContext *s)
tcg_out_pop(s, tcg_target_callee_save_regs[i]);
}
tcg_out_opc(s, OPC_RET, 0, 0, 0);
+
+ /* Try to set up %fs or %gs (whichever isn't already used for TLS)
+ to point to GUEST_BASE. The 1-byte segment override prefix is
+ always smaller than the 4-byte offset we'd have to encode into
+ the address, and is also able to handle the full 64-bit offset. */
+ if (GUEST_BASE) {
+ setup_guest_base_seg();
+ }
}
static void tcg_target_init(TCGContext *s)
For 32-bit, using a segment override is smaller than the 4-byte immediate offset. For 64-bit, segments can hold the entire 64-bit offset whereas the 4-byte immediate cannot. Only implemented for linux, with fallback to the immediate offset if the system call fails. Signed-off-by: Richard Henderson <rth@twiddle.net> --- tcg/i386/tcg-target.c | 206 +++++++++++++++++++++++++++++++++++------------- 1 files changed, 150 insertions(+), 56 deletions(-)