Patchwork [06/25] x86: split off FPU helpers

login
register
mail settings
Submitter Blue Swirl
Date June 3, 2012, 2:30 p.m.
Message ID <CAAu8pHuphGAX19RF9ds9P+ZNEoL5QrfM15DUO-iysf+8pQ8QWw@mail.gmail.com>
Download mbox | patch
Permalink /patch/162539/
State New
Headers show

Comments

Blue Swirl - June 3, 2012, 2:30 p.m.
Move FPU and MMX/SSE helpers to fpu_helpers.c.

Signed-off-by: Blue Swirl <blauwirbel@gmail.com>
---
 Makefile.target          |    3 +-
 target-i386/fpu_helper.c | 1304 ++++++++++++++++++++++++++++++++++++
 target-i386/op_helper.c  | 1671 ++++++----------------------------------------
 3 files changed, 1503 insertions(+), 1475 deletions(-)
 create mode 100644 target-i386/fpu_helper.c

 #undef SHIFT

Patch

diff --git a/Makefile.target b/Makefile.target
index eefaedf..a9551d7 100644
--- a/Makefile.target
+++ b/Makefile.target
@@ -91,7 +91,8 @@  ifneq ($(TARGET_BASE_ARCH), ppc)
 libobj-y += cpu.o
 endif
 ifeq ($(TARGET_BASE_ARCH), i386)
-libobj-y += excp_helper.o
+libobj-y += excp_helper.o fpu_helper.o
+fpu_helper.o: QEMU_CFLAGS += $(HELPER_CFLAGS)
 endif
 libobj-$(TARGET_SPARC64) += vis_helper.o
 libobj-$(CONFIG_NEED_MMU) += mmu.o
diff --git a/target-i386/fpu_helper.c b/target-i386/fpu_helper.c
new file mode 100644
index 0000000..6065c2e
--- /dev/null
+++ b/target-i386/fpu_helper.c
@@ -0,0 +1,1304 @@ 
+/*
+ *  x86 FPU, MMX/3DNow!/SSE/SSE2/SSE3/SSSE3/SSE4/PNI helpers
+ *
+ *  Copyright (c) 2003 Fabrice Bellard
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <math.h>
+#include "cpu.h"
+#include "dyngen-exec.h"
+#include "helper.h"
+
+#if !defined(CONFIG_USER_ONLY)
+#include "softmmu_exec.h"
+#endif /* !defined(CONFIG_USER_ONLY) */
+
+#define FPU_RC_MASK         0xc00
+#define FPU_RC_NEAR         0x000
+#define FPU_RC_DOWN         0x400
+#define FPU_RC_UP           0x800
+#define FPU_RC_CHOP         0xc00
+
+#define MAXTAN 9223372036854775808.0
+
+/* the following deal with x86 long double-precision numbers */
+#define MAXEXPD 0x7fff
+#define EXPBIAS 16383
+#define EXPD(fp)        (fp.l.upper & 0x7fff)
+#define SIGND(fp)       ((fp.l.upper) & 0x8000)
+#define MANTD(fp)       (fp.l.lower)
+#define BIASEXPONENT(fp) fp.l.upper = (fp.l.upper & ~(0x7fff)) | EXPBIAS
+
+#define FPUS_IE (1 << 0)
+#define FPUS_DE (1 << 1)
+#define FPUS_ZE (1 << 2)
+#define FPUS_OE (1 << 3)
+#define FPUS_UE (1 << 4)
+#define FPUS_PE (1 << 5)
+#define FPUS_SF (1 << 6)
+#define FPUS_SE (1 << 7)
+#define FPUS_B  (1 << 15)
+
+#define FPUC_EM 0x3f
+
+#define floatx80_lg2 make_floatx80(0x3ffd, 0x9a209a84fbcff799LL)
+#define floatx80_l2e make_floatx80(0x3fff, 0xb8aa3b295c17f0bcLL)
+#define floatx80_l2t make_floatx80(0x4000, 0xd49a784bcd1b8afeLL)
+
+static inline void fpush(void)
+{
+    env->fpstt = (env->fpstt - 1) & 7;
+    env->fptags[env->fpstt] = 0; /* validate stack entry */
+}
+
+static inline void fpop(void)
+{
+    env->fptags[env->fpstt] = 1; /* invalidate stack entry */
+    env->fpstt = (env->fpstt + 1) & 7;
+}
+
+static inline floatx80 helper_fldt(target_ulong ptr)
+{
+    CPU_LDoubleU temp;
+
+    temp.l.lower = ldq(ptr);
+    temp.l.upper = lduw(ptr + 8);
+    return temp.d;
+}
+
+static inline void helper_fstt(floatx80 f, target_ulong ptr)
+{
+    CPU_LDoubleU temp;
+
+    temp.d = f;
+    stq(ptr, temp.l.lower);
+    stw(ptr + 8, temp.l.upper);
+}
+
+/* x87 FPU helpers */
+
+static inline double floatx80_to_double(floatx80 a)
+{
+    union {
+        float64 f64;
+        double d;
+    } u;
+
+    u.f64 = floatx80_to_float64(a, &env->fp_status);
+    return u.d;
+}
+
+static inline floatx80 double_to_floatx80(double a)
+{
+    union {
+        float64 f64;
+        double d;
+    } u;
+
+    u.d = a;
+    return float64_to_floatx80(u.f64, &env->fp_status);
+}
+
+static void fpu_set_exception(int mask)
+{
+    env->fpus |= mask;
+    if (env->fpus & (~env->fpuc & FPUC_EM)) {
+        env->fpus |= FPUS_SE | FPUS_B;
+    }
+}
+
+static inline floatx80 helper_fdiv(floatx80 a, floatx80 b)
+{
+    if (floatx80_is_zero(b)) {
+        fpu_set_exception(FPUS_ZE);
+    }
+    return floatx80_div(a, b, &env->fp_status);
+}
+
+static void fpu_raise_exception(void)
+{
+    if (env->cr[0] & CR0_NE_MASK) {
+        raise_exception(env, EXCP10_COPR);
+    }
+#if !defined(CONFIG_USER_ONLY)
+    else {
+        cpu_set_ferr(env);
+    }
+#endif
+}
+
+void helper_flds_FT0(uint32_t val)
+{
+    union {
+        float32 f;
+        uint32_t i;
+    } u;
+
+    u.i = val;
+    FT0 = float32_to_floatx80(u.f, &env->fp_status);
+}
+
+void helper_fldl_FT0(uint64_t val)
+{
+    union {
+        float64 f;
+        uint64_t i;
+    } u;
+
+    u.i = val;
+    FT0 = float64_to_floatx80(u.f, &env->fp_status);
+}
+
+void helper_fildl_FT0(int32_t val)
+{
+    FT0 = int32_to_floatx80(val, &env->fp_status);
+}
+
+void helper_flds_ST0(uint32_t val)
+{
+    int new_fpstt;
+    union {
+        float32 f;
+        uint32_t i;
+    } u;
+
+    new_fpstt = (env->fpstt - 1) & 7;
+    u.i = val;
+    env->fpregs[new_fpstt].d = float32_to_floatx80(u.f, &env->fp_status);
+    env->fpstt = new_fpstt;
+    env->fptags[new_fpstt] = 0; /* validate stack entry */
+}
+
+void helper_fldl_ST0(uint64_t val)
+{
+    int new_fpstt;
+    union {
+        float64 f;
+        uint64_t i;
+    } u;
+
+    new_fpstt = (env->fpstt - 1) & 7;
+    u.i = val;
+    env->fpregs[new_fpstt].d = float64_to_floatx80(u.f, &env->fp_status);
+    env->fpstt = new_fpstt;
+    env->fptags[new_fpstt] = 0; /* validate stack entry */
+}
+
+void helper_fildl_ST0(int32_t val)
+{
+    int new_fpstt;
+
+    new_fpstt = (env->fpstt - 1) & 7;
+    env->fpregs[new_fpstt].d = int32_to_floatx80(val, &env->fp_status);
+    env->fpstt = new_fpstt;
+    env->fptags[new_fpstt] = 0; /* validate stack entry */
+}
+
+void helper_fildll_ST0(int64_t val)
+{
+    int new_fpstt;
+
+    new_fpstt = (env->fpstt - 1) & 7;
+    env->fpregs[new_fpstt].d = int64_to_floatx80(val, &env->fp_status);
+    env->fpstt = new_fpstt;
+    env->fptags[new_fpstt] = 0; /* validate stack entry */
+}
+
+uint32_t helper_fsts_ST0(void)
+{
+    union {
+        float32 f;
+        uint32_t i;
+    } u;
+
+    u.f = floatx80_to_float32(ST0, &env->fp_status);
+    return u.i;
+}
+
+uint64_t helper_fstl_ST0(void)
+{
+    union {
+        float64 f;
+        uint64_t i;
+    } u;
+
+    u.f = floatx80_to_float64(ST0, &env->fp_status);
+    return u.i;
+}
+
+int32_t helper_fist_ST0(void)
+{
+    int32_t val;
+
+    val = floatx80_to_int32(ST0, &env->fp_status);
+    if (val != (int16_t)val) {
+        val = -32768;
+    }
+    return val;
+}
+
+int32_t helper_fistl_ST0(void)
+{
+    int32_t val;
+
+    val = floatx80_to_int32(ST0, &env->fp_status);
+    return val;
+}
+
+int64_t helper_fistll_ST0(void)
+{
+    int64_t val;
+
+    val = floatx80_to_int64(ST0, &env->fp_status);
+    return val;
+}
+
+int32_t helper_fistt_ST0(void)
+{
+    int32_t val;
+
+    val = floatx80_to_int32_round_to_zero(ST0, &env->fp_status);
+    if (val != (int16_t)val) {
+        val = -32768;
+    }
+    return val;
+}
+
+int32_t helper_fisttl_ST0(void)
+{
+    int32_t val;
+
+    val = floatx80_to_int32_round_to_zero(ST0, &env->fp_status);
+    return val;
+}
+
+int64_t helper_fisttll_ST0(void)
+{
+    int64_t val;
+
+    val = floatx80_to_int64_round_to_zero(ST0, &env->fp_status);
+    return val;
+}
+
+void helper_fldt_ST0(target_ulong ptr)
+{
+    int new_fpstt;
+
+    new_fpstt = (env->fpstt - 1) & 7;
+    env->fpregs[new_fpstt].d = helper_fldt(ptr);
+    env->fpstt = new_fpstt;
+    env->fptags[new_fpstt] = 0; /* validate stack entry */
+}
+
+void helper_fstt_ST0(target_ulong ptr)
+{
+    helper_fstt(ST0, ptr);
+}
+
+void helper_fpush(void)
+{
+    fpush();
+}
+
+void helper_fpop(void)
+{
+    fpop();
+}
+
+void helper_fdecstp(void)
+{
+    env->fpstt = (env->fpstt - 1) & 7;
+    env->fpus &= ~0x4700;
+}
+
+void helper_fincstp(void)
+{
+    env->fpstt = (env->fpstt + 1) & 7;
+    env->fpus &= ~0x4700;
+}
+
+/* FPU move */
+
+void helper_ffree_STN(int st_index)
+{
+    env->fptags[(env->fpstt + st_index) & 7] = 1;
+}
+
+void helper_fmov_ST0_FT0(void)
+{
+    ST0 = FT0;
+}
+
+void helper_fmov_FT0_STN(int st_index)
+{
+    FT0 = ST(st_index);
+}
+
+void helper_fmov_ST0_STN(int st_index)
+{
+    ST0 = ST(st_index);
+}
+
+void helper_fmov_STN_ST0(int st_index)
+{
+    ST(st_index) = ST0;
+}
+
+void helper_fxchg_ST0_STN(int st_index)
+{
+    floatx80 tmp;
+
+    tmp = ST(st_index);
+    ST(st_index) = ST0;
+    ST0 = tmp;
+}
+
+/* FPU operations */
+
+static const int fcom_ccval[4] = {0x0100, 0x4000, 0x0000, 0x4500};
+
+void helper_fcom_ST0_FT0(void)
+{
+    int ret;
+
+    ret = floatx80_compare(ST0, FT0, &env->fp_status);
+    env->fpus = (env->fpus & ~0x4500) | fcom_ccval[ret + 1];
+}
+
+void helper_fucom_ST0_FT0(void)
+{
+    int ret;
+
+    ret = floatx80_compare_quiet(ST0, FT0, &env->fp_status);
+    env->fpus = (env->fpus & ~0x4500) | fcom_ccval[ret + 1];
+}
+
+static const int fcomi_ccval[4] = {CC_C, CC_Z, 0, CC_Z | CC_P | CC_C};
+
+void helper_fcomi_ST0_FT0(void)
+{
+    int eflags;
+    int ret;
+
+    ret = floatx80_compare(ST0, FT0, &env->fp_status);
+    eflags = helper_cc_compute_all(CC_OP);
+    eflags = (eflags & ~(CC_Z | CC_P | CC_C)) | fcomi_ccval[ret + 1];
+    CC_SRC = eflags;
+}
+
+void helper_fucomi_ST0_FT0(void)
+{
+    int eflags;
+    int ret;
+
+    ret = floatx80_compare_quiet(ST0, FT0, &env->fp_status);
+    eflags = helper_cc_compute_all(CC_OP);
+    eflags = (eflags & ~(CC_Z | CC_P | CC_C)) | fcomi_ccval[ret + 1];
+    CC_SRC = eflags;
+}
+
+void helper_fadd_ST0_FT0(void)
+{
+    ST0 = floatx80_add(ST0, FT0, &env->fp_status);
+}
+
+void helper_fmul_ST0_FT0(void)
+{
+    ST0 = floatx80_mul(ST0, FT0, &env->fp_status);
+}
+
+void helper_fsub_ST0_FT0(void)
+{
+    ST0 = floatx80_sub(ST0, FT0, &env->fp_status);
+}
+
+void helper_fsubr_ST0_FT0(void)
+{
+    ST0 = floatx80_sub(FT0, ST0, &env->fp_status);
+}
+
+void helper_fdiv_ST0_FT0(void)
+{
+    ST0 = helper_fdiv(ST0, FT0);
+}
+
+void helper_fdivr_ST0_FT0(void)
+{
+    ST0 = helper_fdiv(FT0, ST0);
+}
+
+/* fp operations between STN and ST0 */
+
+void helper_fadd_STN_ST0(int st_index)
+{
+    ST(st_index) = floatx80_add(ST(st_index), ST0, &env->fp_status);
+}
+
+void helper_fmul_STN_ST0(int st_index)
+{
+    ST(st_index) = floatx80_mul(ST(st_index), ST0, &env->fp_status);
+}
+
+void helper_fsub_STN_ST0(int st_index)
+{
+    ST(st_index) = floatx80_sub(ST(st_index), ST0, &env->fp_status);
+}
+
+void helper_fsubr_STN_ST0(int st_index)
+{
+    ST(st_index) = floatx80_sub(ST0, ST(st_index), &env->fp_status);
+}
+
+void helper_fdiv_STN_ST0(int st_index)
+{
+    floatx80 *p;
+
+    p = &ST(st_index);
+    *p = helper_fdiv(*p, ST0);
+}
+
+void helper_fdivr_STN_ST0(int st_index)
+{
+    floatx80 *p;
+
+    p = &ST(st_index);
+    *p = helper_fdiv(ST0, *p);
+}
+
+/* misc FPU operations */
+void helper_fchs_ST0(void)
+{
+    ST0 = floatx80_chs(ST0);
+}
+
+void helper_fabs_ST0(void)
+{
+    ST0 = floatx80_abs(ST0);
+}
+
+void helper_fld1_ST0(void)
+{
+    ST0 = floatx80_one;
+}
+
+void helper_fldl2t_ST0(void)
+{
+    ST0 = floatx80_l2t;
+}
+
+void helper_fldl2e_ST0(void)
+{
+    ST0 = floatx80_l2e;
+}
+
+void helper_fldpi_ST0(void)
+{
+    ST0 = floatx80_pi;
+}
+
+void helper_fldlg2_ST0(void)
+{
+    ST0 = floatx80_lg2;
+}
+
+void helper_fldln2_ST0(void)
+{
+    ST0 = floatx80_ln2;
+}
+
+void helper_fldz_ST0(void)
+{
+    ST0 = floatx80_zero;
+}
+
+void helper_fldz_FT0(void)
+{
+    FT0 = floatx80_zero;
+}
+
+uint32_t helper_fnstsw(void)
+{
+    return (env->fpus & ~0x3800) | (env->fpstt & 0x7) << 11;
+}
+
+uint32_t helper_fnstcw(void)
+{
+    return env->fpuc;
+}
+
+static void update_fp_status(void)
+{
+    int rnd_type;
+
+    /* set rounding mode */
+    switch (env->fpuc & FPU_RC_MASK) {
+    default:
+    case FPU_RC_NEAR:
+        rnd_type = float_round_nearest_even;
+        break;
+    case FPU_RC_DOWN:
+        rnd_type = float_round_down;
+        break;
+    case FPU_RC_UP:
+        rnd_type = float_round_up;
+        break;
+    case FPU_RC_CHOP:
+        rnd_type = float_round_to_zero;
+        break;
+    }
+    set_float_rounding_mode(rnd_type, &env->fp_status);
+    switch ((env->fpuc >> 8) & 3) {
+    case 0:
+        rnd_type = 32;
+        break;
+    case 2:
+        rnd_type = 64;
+        break;
+    case 3:
+    default:
+        rnd_type = 80;
+        break;
+    }
+    set_floatx80_rounding_precision(rnd_type, &env->fp_status);
+}
+
+void helper_fldcw(uint32_t val)
+{
+    env->fpuc = val;
+    update_fp_status();
+}
+
+void helper_fclex(void)
+{
+    env->fpus &= 0x7f00;
+}
+
+void helper_fwait(void)
+{
+    if (env->fpus & FPUS_SE) {
+        fpu_raise_exception();
+    }
+}
+
+void helper_fninit(void)
+{
+    env->fpus = 0;
+    env->fpstt = 0;
+    env->fpuc = 0x37f;
+    env->fptags[0] = 1;
+    env->fptags[1] = 1;
+    env->fptags[2] = 1;
+    env->fptags[3] = 1;
+    env->fptags[4] = 1;
+    env->fptags[5] = 1;
+    env->fptags[6] = 1;
+    env->fptags[7] = 1;
+}
+
+/* BCD ops */
+
+void helper_fbld_ST0(target_ulong ptr)
+{
+    floatx80 tmp;
+    uint64_t val;
+    unsigned int v;
+    int i;
+
+    val = 0;
+    for (i = 8; i >= 0; i--) {
+        v = ldub(ptr + i);
+        val = (val * 100) + ((v >> 4) * 10) + (v & 0xf);
+    }
+    tmp = int64_to_floatx80(val, &env->fp_status);
+    if (ldub(ptr + 9) & 0x80) {
+        floatx80_chs(tmp);
+    }
+    fpush();
+    ST0 = tmp;
+}
+
+void helper_fbst_ST0(target_ulong ptr)
+{
+    int v;
+    target_ulong mem_ref, mem_end;
+    int64_t val;
+
+    val = floatx80_to_int64(ST0, &env->fp_status);
+    mem_ref = ptr;
+    mem_end = mem_ref + 9;
+    if (val < 0) {
+        stb(mem_end, 0x80);
+        val = -val;
+    } else {
+        stb(mem_end, 0x00);
+    }
+    while (mem_ref < mem_end) {
+        if (val == 0) {
+            break;
+        }
+        v = val % 100;
+        val = val / 100;
+        v = ((v / 10) << 4) | (v % 10);
+        stb(mem_ref++, v);
+    }
+    while (mem_ref < mem_end) {
+        stb(mem_ref++, 0);
+    }
+}
+
+void helper_f2xm1(void)
+{
+    double val = floatx80_to_double(ST0);
+
+    val = pow(2.0, val) - 1.0;
+    ST0 = double_to_floatx80(val);
+}
+
+void helper_fyl2x(void)
+{
+    double fptemp = floatx80_to_double(ST0);
+
+    if (fptemp > 0.0) {
+        fptemp = log(fptemp) / log(2.0); /* log2(ST) */
+        fptemp *= floatx80_to_double(ST1);
+        ST1 = double_to_floatx80(fptemp);
+        fpop();
+    } else {
+        env->fpus &= ~0x4700;
+        env->fpus |= 0x400;
+    }
+}
+
+void helper_fptan(void)
+{
+    double fptemp = floatx80_to_double(ST0);
+
+    if ((fptemp > MAXTAN) || (fptemp < -MAXTAN)) {
+        env->fpus |= 0x400;
+    } else {
+        fptemp = tan(fptemp);
+        ST0 = double_to_floatx80(fptemp);
+        fpush();
+        ST0 = floatx80_one;
+        env->fpus &= ~0x400; /* C2 <-- 0 */
+        /* the above code is for |arg| < 2**52 only */
+    }
+}
+
+void helper_fpatan(void)
+{
+    double fptemp, fpsrcop;
+
+    fpsrcop = floatx80_to_double(ST1);
+    fptemp = floatx80_to_double(ST0);
+    ST1 = double_to_floatx80(atan2(fpsrcop, fptemp));
+    fpop();
+}
+
+void helper_fxtract(void)
+{
+    CPU_LDoubleU temp;
+
+    temp.d = ST0;
+
+    if (floatx80_is_zero(ST0)) {
+        /* Easy way to generate -inf and raising division by 0 exception */
+        ST0 = floatx80_div(floatx80_chs(floatx80_one), floatx80_zero,
+                           &env->fp_status);
+        fpush();
+        ST0 = temp.d;
+    } else {
+        int expdif;
+
+        expdif = EXPD(temp) - EXPBIAS;
+        /* DP exponent bias */
+        ST0 = int32_to_floatx80(expdif, &env->fp_status);
+        fpush();
+        BIASEXPONENT(temp);
+        ST0 = temp.d;
+    }
+}
+
+void helper_fprem1(void)
+{
+    double st0, st1, dblq, fpsrcop, fptemp;
+    CPU_LDoubleU fpsrcop1, fptemp1;
+    int expdif;
+    signed long long int q;
+
+    st0 = floatx80_to_double(ST0);
+    st1 = floatx80_to_double(ST1);
+
+    if (isinf(st0) || isnan(st0) || isnan(st1) || (st1 == 0.0)) {
+        ST0 = double_to_floatx80(0.0 / 0.0); /* NaN */
+        env->fpus &= ~0x4700; /* (C3,C2,C1,C0) <-- 0000 */
+        return;
+    }
+
+    fpsrcop = st0;
+    fptemp = st1;
+    fpsrcop1.d = ST0;
+    fptemp1.d = ST1;
+    expdif = EXPD(fpsrcop1) - EXPD(fptemp1);
+
+    if (expdif < 0) {
+        /* optimisation? taken from the AMD docs */
+        env->fpus &= ~0x4700; /* (C3,C2,C1,C0) <-- 0000 */
+        /* ST0 is unchanged */
+        return;
+    }
+
+    if (expdif < 53) {
+        dblq = fpsrcop / fptemp;
+        /* round dblq towards nearest integer */
+        dblq = rint(dblq);
+        st0 = fpsrcop - fptemp * dblq;
+
+        /* convert dblq to q by truncating towards zero */
+        if (dblq < 0.0) {
+            q = (signed long long int)(-dblq);
+        } else {
+            q = (signed long long int)dblq;
+        }
+
+        env->fpus &= ~0x4700; /* (C3,C2,C1,C0) <-- 0000 */
+        /* (C0,C3,C1) <-- (q2,q1,q0) */
+        env->fpus |= (q & 0x4) << (8 - 2);  /* (C0) <-- q2 */
+        env->fpus |= (q & 0x2) << (14 - 1); /* (C3) <-- q1 */
+        env->fpus |= (q & 0x1) << (9 - 0);  /* (C1) <-- q0 */
+    } else {
+        env->fpus |= 0x400;  /* C2 <-- 1 */
+        fptemp = pow(2.0, expdif - 50);
+        fpsrcop = (st0 / st1) / fptemp;
+        /* fpsrcop = integer obtained by chopping */
+        fpsrcop = (fpsrcop < 0.0) ?
+                  -(floor(fabs(fpsrcop))) : floor(fpsrcop);
+        st0 -= (st1 * fpsrcop * fptemp);
+    }
+    ST0 = double_to_floatx80(st0);
+}
+
+void helper_fprem(void)
+{
+    double st0, st1, dblq, fpsrcop, fptemp;
+    CPU_LDoubleU fpsrcop1, fptemp1;
+    int expdif;
+    signed long long int q;
+
+    st0 = floatx80_to_double(ST0);
+    st1 = floatx80_to_double(ST1);
+
+    if (isinf(st0) || isnan(st0) || isnan(st1) || (st1 == 0.0)) {
+        ST0 = double_to_floatx80(0.0 / 0.0); /* NaN */
+        env->fpus &= ~0x4700; /* (C3,C2,C1,C0) <-- 0000 */
+        return;
+    }
+
+    fpsrcop = st0;
+    fptemp = st1;
+    fpsrcop1.d = ST0;
+    fptemp1.d = ST1;
+    expdif = EXPD(fpsrcop1) - EXPD(fptemp1);
+
+    if (expdif < 0) {
+        /* optimisation? taken from the AMD docs */
+        env->fpus &= ~0x4700; /* (C3,C2,C1,C0) <-- 0000 */
+        /* ST0 is unchanged */
+        return;
+    }
+
+    if (expdif < 53) {
+        dblq = fpsrcop / fptemp; /* ST0 / ST1 */
+        /* round dblq towards zero */
+        dblq = (dblq < 0.0) ? ceil(dblq) : floor(dblq);
+        st0 = fpsrcop - fptemp * dblq; /* fpsrcop is ST0 */
+
+        /* convert dblq to q by truncating towards zero */
+        if (dblq < 0.0) {
+            q = (signed long long int)(-dblq);
+        } else {
+            q = (signed long long int)dblq;
+        }
+
+        env->fpus &= ~0x4700; /* (C3,C2,C1,C0) <-- 0000 */
+        /* (C0,C3,C1) <-- (q2,q1,q0) */
+        env->fpus |= (q & 0x4) << (8 - 2);  /* (C0) <-- q2 */
+        env->fpus |= (q & 0x2) << (14 - 1); /* (C3) <-- q1 */
+        env->fpus |= (q & 0x1) << (9 - 0);  /* (C1) <-- q0 */
+    } else {
+        int N = 32 + (expdif % 32); /* as per AMD docs */
+
+        env->fpus |= 0x400;  /* C2 <-- 1 */
+        fptemp = pow(2.0, (double)(expdif - N));
+        fpsrcop = (st0 / st1) / fptemp;
+        /* fpsrcop = integer obtained by chopping */
+        fpsrcop = (fpsrcop < 0.0) ?
+                  -(floor(fabs(fpsrcop))) : floor(fpsrcop);
+        st0 -= (st1 * fpsrcop * fptemp);
+    }
+    ST0 = double_to_floatx80(st0);
+}
+
+void helper_fyl2xp1(void)
+{
+    double fptemp = floatx80_to_double(ST0);
+
+    if ((fptemp + 1.0) > 0.0) {
+        fptemp = log(fptemp + 1.0) / log(2.0); /* log2(ST + 1.0) */
+        fptemp *= floatx80_to_double(ST1);
+        ST1 = double_to_floatx80(fptemp);
+        fpop();
+    } else {
+        env->fpus &= ~0x4700;
+        env->fpus |= 0x400;
+    }
+}
+
+void helper_fsqrt(void)
+{
+    if (floatx80_is_neg(ST0)) {
+        env->fpus &= ~0x4700;  /* (C3,C2,C1,C0) <-- 0000 */
+        env->fpus |= 0x400;
+    }
+    ST0 = floatx80_sqrt(ST0, &env->fp_status);
+}
+
+void helper_fsincos(void)
+{
+    double fptemp = floatx80_to_double(ST0);
+
+    if ((fptemp > MAXTAN) || (fptemp < -MAXTAN)) {
+        env->fpus |= 0x400;
+    } else {
+        ST0 = double_to_floatx80(sin(fptemp));
+        fpush();
+        ST0 = double_to_floatx80(cos(fptemp));
+        env->fpus &= ~0x400;  /* C2 <-- 0 */
+        /* the above code is for |arg| < 2**63 only */
+    }
+}
+
+void helper_frndint(void)
+{
+    ST0 = floatx80_round_to_int(ST0, &env->fp_status);
+}
+
+void helper_fscale(void)
+{
+    if (floatx80_is_any_nan(ST1)) {
+        ST0 = ST1;
+    } else {
+        int n = floatx80_to_int32_round_to_zero(ST1, &env->fp_status);
+        ST0 = floatx80_scalbn(ST0, n, &env->fp_status);
+    }
+}
+
+void helper_fsin(void)
+{
+    double fptemp = floatx80_to_double(ST0);
+
+    if ((fptemp > MAXTAN) || (fptemp < -MAXTAN)) {
+        env->fpus |= 0x400;
+    } else {
+        ST0 = double_to_floatx80(sin(fptemp));
+        env->fpus &= ~0x400;  /* C2 <-- 0 */
+        /* the above code is for |arg| < 2**53 only */
+    }
+}
+
+void helper_fcos(void)
+{
+    double fptemp = floatx80_to_double(ST0);
+
+    if ((fptemp > MAXTAN) || (fptemp < -MAXTAN)) {
+        env->fpus |= 0x400;
+    } else {
+        ST0 = double_to_floatx80(cos(fptemp));
+        env->fpus &= ~0x400;  /* C2 <-- 0 */
+        /* the above code is for |arg| < 2**63 only */
+    }
+}
+
+void helper_fxam_ST0(void)
+{
+    CPU_LDoubleU temp;
+    int expdif;
+
+    temp.d = ST0;
+
+    env->fpus &= ~0x4700; /* (C3,C2,C1,C0) <-- 0000 */
+    if (SIGND(temp)) {
+        env->fpus |= 0x200; /* C1 <-- 1 */
+    }
+
+    /* XXX: test fptags too */
+    expdif = EXPD(temp);
+    if (expdif == MAXEXPD) {
+        if (MANTD(temp) == 0x8000000000000000ULL) {
+            env->fpus |= 0x500; /* Infinity */
+        } else {
+            env->fpus |= 0x100; /* NaN */
+        }
+    } else if (expdif == 0) {
+        if (MANTD(temp) == 0) {
+            env->fpus |=  0x4000; /* Zero */
+        } else {
+            env->fpus |= 0x4400; /* Denormal */
+        }
+    } else {
+        env->fpus |= 0x400;
+    }
+}
+
+void helper_fstenv(target_ulong ptr, int data32)
+{
+    int fpus, fptag, exp, i;
+    uint64_t mant;
+    CPU_LDoubleU tmp;
+
+    fpus = (env->fpus & ~0x3800) | (env->fpstt & 0x7) << 11;
+    fptag = 0;
+    for (i = 7; i >= 0; i--) {
+        fptag <<= 2;
+        if (env->fptags[i]) {
+            fptag |= 3;
+        } else {
+            tmp.d = env->fpregs[i].d;
+            exp = EXPD(tmp);
+            mant = MANTD(tmp);
+            if (exp == 0 && mant == 0) {
+                /* zero */
+                fptag |= 1;
+            } else if (exp == 0 || exp == MAXEXPD
+                       || (mant & (1LL << 63)) == 0) {
+                /* NaNs, infinity, denormal */
+                fptag |= 2;
+            }
+        }
+    }
+    if (data32) {
+        /* 32 bit */
+        stl(ptr, env->fpuc);
+        stl(ptr + 4, fpus);
+        stl(ptr + 8, fptag);
+        stl(ptr + 12, 0); /* fpip */
+        stl(ptr + 16, 0); /* fpcs */
+        stl(ptr + 20, 0); /* fpoo */
+        stl(ptr + 24, 0); /* fpos */
+    } else {
+        /* 16 bit */
+        stw(ptr, env->fpuc);
+        stw(ptr + 2, fpus);
+        stw(ptr + 4, fptag);
+        stw(ptr + 6, 0);
+        stw(ptr + 8, 0);
+        stw(ptr + 10, 0);
+        stw(ptr + 12, 0);
+    }
+}
+
+void helper_fldenv(target_ulong ptr, int data32)
+{
+    int i, fpus, fptag;
+
+    if (data32) {
+        env->fpuc = lduw(ptr);
+        fpus = lduw(ptr + 4);
+        fptag = lduw(ptr + 8);
+    } else {
+        env->fpuc = lduw(ptr);
+        fpus = lduw(ptr + 2);
+        fptag = lduw(ptr + 4);
+    }
+    env->fpstt = (fpus >> 11) & 7;
+    env->fpus = fpus & ~0x3800;
+    for (i = 0; i < 8; i++) {
+        env->fptags[i] = ((fptag & 3) == 3);
+        fptag >>= 2;
+    }
+}
+
+void helper_fsave(target_ulong ptr, int data32)
+{
+    floatx80 tmp;
+    int i;
+
+    helper_fstenv(ptr, data32);
+
+    ptr += (14 << data32);
+    for (i = 0; i < 8; i++) {
+        tmp = ST(i);
+        helper_fstt(tmp, ptr);
+        ptr += 10;
+    }
+
+    /* fninit */
+    env->fpus = 0;
+    env->fpstt = 0;
+    env->fpuc = 0x37f;
+    env->fptags[0] = 1;
+    env->fptags[1] = 1;
+    env->fptags[2] = 1;
+    env->fptags[3] = 1;
+    env->fptags[4] = 1;
+    env->fptags[5] = 1;
+    env->fptags[6] = 1;
+    env->fptags[7] = 1;
+}
+
+void helper_frstor(target_ulong ptr, int data32)
+{
+    floatx80 tmp;
+    int i;
+
+    helper_fldenv(ptr, data32);
+    ptr += (14 << data32);
+
+    for (i = 0; i < 8; i++) {
+        tmp = helper_fldt(ptr);
+        ST(i) = tmp;
+        ptr += 10;
+    }
+}
+
+#if defined(CONFIG_USER_ONLY)
+void cpu_x86_fsave(CPUX86State *s, target_ulong ptr, int data32)
+{
+    CPUX86State *saved_env;
+
+    saved_env = env;
+    env = s;
+
+    helper_fsave(ptr, data32);
+
+    env = saved_env;
+}
+
+void cpu_x86_frstor(CPUX86State *s, target_ulong ptr, int data32)
+{
+    CPUX86State *saved_env;
+
+    saved_env = env;
+    env = s;
+
+    helper_frstor(ptr, data32);
+
+    env = saved_env;
+}
+#endif
+
+void helper_fxsave(target_ulong ptr, int data64)
+{
+    int fpus, fptag, i, nb_xmm_regs;
+    floatx80 tmp;
+    target_ulong addr;
+
+    /* The operand must be 16 byte aligned */
+    if (ptr & 0xf) {
+        raise_exception(env, EXCP0D_GPF);
+    }
+
+    fpus = (env->fpus & ~0x3800) | (env->fpstt & 0x7) << 11;
+    fptag = 0;
+    for (i = 0; i < 8; i++) {
+        fptag |= (env->fptags[i] << i);
+    }
+    stw(ptr, env->fpuc);
+    stw(ptr + 2, fpus);
+    stw(ptr + 4, fptag ^ 0xff);
+#ifdef TARGET_X86_64
+    if (data64) {
+        stq(ptr + 0x08, 0); /* rip */
+        stq(ptr + 0x10, 0); /* rdp */
+    } else
+#endif
+    {
+        stl(ptr + 0x08, 0); /* eip */
+        stl(ptr + 0x0c, 0); /* sel  */
+        stl(ptr + 0x10, 0); /* dp */
+        stl(ptr + 0x14, 0); /* sel  */
+    }
+
+    addr = ptr + 0x20;
+    for (i = 0; i < 8; i++) {
+        tmp = ST(i);
+        helper_fstt(tmp, addr);
+        addr += 16;
+    }
+
+    if (env->cr[4] & CR4_OSFXSR_MASK) {
+        /* XXX: finish it */
+        stl(ptr + 0x18, env->mxcsr); /* mxcsr */
+        stl(ptr + 0x1c, 0x0000ffff); /* mxcsr_mask */
+        if (env->hflags & HF_CS64_MASK) {
+            nb_xmm_regs = 16;
+        } else {
+            nb_xmm_regs = 8;
+        }
+        addr = ptr + 0xa0;
+        /* Fast FXSAVE leaves out the XMM registers */
+        if (!(env->efer & MSR_EFER_FFXSR)
+            || (env->hflags & HF_CPL_MASK)
+            || !(env->hflags & HF_LMA_MASK)) {
+            for (i = 0; i < nb_xmm_regs; i++) {
+                stq(addr, env->xmm_regs[i].XMM_Q(0));
+                stq(addr + 8, env->xmm_regs[i].XMM_Q(1));
+                addr += 16;
+            }
+        }
+    }
+}
+
+void helper_fxrstor(target_ulong ptr, int data64)
+{
+    int i, fpus, fptag, nb_xmm_regs;
+    floatx80 tmp;
+    target_ulong addr;
+
+    /* The operand must be 16 byte aligned */
+    if (ptr & 0xf) {
+        raise_exception(env, EXCP0D_GPF);
+    }
+
+    env->fpuc = lduw(ptr);
+    fpus = lduw(ptr + 2);
+    fptag = lduw(ptr + 4);
+    env->fpstt = (fpus >> 11) & 7;
+    env->fpus = fpus & ~0x3800;
+    fptag ^= 0xff;
+    for (i = 0; i < 8; i++) {
+        env->fptags[i] = ((fptag >> i) & 1);
+    }
+
+    addr = ptr + 0x20;
+    for (i = 0; i < 8; i++) {
+        tmp = helper_fldt(addr);
+        ST(i) = tmp;
+        addr += 16;
+    }
+
+    if (env->cr[4] & CR4_OSFXSR_MASK) {
+        /* XXX: finish it */
+        env->mxcsr = ldl(ptr + 0x18);
+        /* ldl(ptr + 0x1c); */
+        if (env->hflags & HF_CS64_MASK) {
+            nb_xmm_regs = 16;
+        } else {
+            nb_xmm_regs = 8;
+        }
+        addr = ptr + 0xa0;
+        /* Fast FXRESTORE leaves out the XMM registers */
+        if (!(env->efer & MSR_EFER_FFXSR)
+            || (env->hflags & HF_CPL_MASK)
+            || !(env->hflags & HF_LMA_MASK)) {
+            for (i = 0; i < nb_xmm_regs; i++) {
+                env->xmm_regs[i].XMM_Q(0) = ldq(addr);
+                env->xmm_regs[i].XMM_Q(1) = ldq(addr + 8);
+                addr += 16;
+            }
+        }
+    }
+}
+
+void cpu_get_fp80(uint64_t *pmant, uint16_t *pexp, floatx80 f)
+{
+    CPU_LDoubleU temp;
+
+    temp.d = f;
+    *pmant = temp.l.lower;
+    *pexp = temp.l.upper;
+}
+
+floatx80 cpu_set_fp80(uint64_t mant, uint16_t upper)
+{
+    CPU_LDoubleU temp;
+
+    temp.l.upper = upper;
+    temp.l.lower = mant;
+    return temp.d;
+}
+
+/* MMX/SSE */
+/* XXX: optimize by storing fptt and fptags in the static cpu state */
+
+#define SSE_DAZ             0x0040
+#define SSE_RC_MASK         0x6000
+#define SSE_RC_NEAR         0x0000
+#define SSE_RC_DOWN         0x2000
+#define SSE_RC_UP           0x4000
+#define SSE_RC_CHOP         0x6000
+#define SSE_FZ              0x8000
+
+static void update_sse_status(void)
+{
+    int rnd_type;
+
+    /* set rounding mode */
+    switch (env->mxcsr & SSE_RC_MASK) {
+    default:
+    case SSE_RC_NEAR:
+        rnd_type = float_round_nearest_even;
+        break;
+    case SSE_RC_DOWN:
+        rnd_type = float_round_down;
+        break;
+    case SSE_RC_UP:
+        rnd_type = float_round_up;
+        break;
+    case SSE_RC_CHOP:
+        rnd_type = float_round_to_zero;
+        break;
+    }
+    set_float_rounding_mode(rnd_type, &env->sse_status);
+
+    /* set denormals are zero */
+    set_flush_inputs_to_zero((env->mxcsr & SSE_DAZ) ? 1 : 0, &env->sse_status);
+
+    /* set flush to zero */
+    set_flush_to_zero((env->mxcsr & SSE_FZ) ? 1 : 0, &env->fp_status);
+}
+
+void helper_ldmxcsr(uint32_t val)
+{
+    env->mxcsr = val;
+    update_sse_status();
+}
+
+void helper_enter_mmx(void)
+{
+    env->fpstt = 0;
+    *(uint32_t *)(env->fptags) = 0;
+    *(uint32_t *)(env->fptags + 4) = 0;
+}
+
+void helper_emms(void)
+{
+    /* set to empty state */
+    *(uint32_t *)(env->fptags) = 0x01010101;
+    *(uint32_t *)(env->fptags + 4) = 0x01010101;
+}
+
+/* XXX: suppress */
+void helper_movq(void *d, void *s)
+{
+    *(uint64_t *)d = *(uint64_t *)s;
+}
+
+#define SHIFT 0
+#include "ops_sse.h"
+
+#define SHIFT 1
+#include "ops_sse.h"
diff --git a/target-i386/op_helper.c b/target-i386/op_helper.c
index 8466e9d..fba7fc6 100644
--- a/target-i386/op_helper.c
+++ b/target-i386/op_helper.c
@@ -17,7 +17,6 @@ 
  * License along with this library; if not, see <http://www.gnu.org/licenses/>.
  */

-#include <math.h>
 #include "cpu.h"
 #include "dyngen-exec.h"
 #include "host-utils.h"
@@ -52,64 +51,6 @@  static inline target_long lshift(target_long x, int n)
     }
 }

-#define FPU_RC_MASK         0xc00
-#define FPU_RC_NEAR         0x000
-#define FPU_RC_DOWN         0x400
-#define FPU_RC_UP           0x800
-#define FPU_RC_CHOP         0xc00
-
-#define MAXTAN 9223372036854775808.0
-
-/* the following deal with x86 long double-precision numbers */
-#define MAXEXPD 0x7fff
-#define EXPBIAS 16383
-#define EXPD(fp)        (fp.l.upper & 0x7fff)
-#define SIGND(fp)       ((fp.l.upper) & 0x8000)
-#define MANTD(fp)       (fp.l.lower)
-#define BIASEXPONENT(fp) fp.l.upper = (fp.l.upper & ~(0x7fff)) | EXPBIAS
-
-static inline void fpush(void)
-{
-    env->fpstt = (env->fpstt - 1) & 7;
-    env->fptags[env->fpstt] = 0; /* validate stack entry */
-}
-
-static inline void fpop(void)
-{
-    env->fptags[env->fpstt] = 1; /* invalidate stack entry */
-    env->fpstt = (env->fpstt + 1) & 7;
-}
-
-static inline floatx80 helper_fldt(target_ulong ptr)
-{
-    CPU_LDoubleU temp;
-
-    temp.l.lower = ldq(ptr);
-    temp.l.upper = lduw(ptr + 8);
-    return temp.d;
-}
-
-static inline void helper_fstt(floatx80 f, target_ulong ptr)
-{
-    CPU_LDoubleU temp;
-
-    temp.d = f;
-    stq(ptr, temp.l.lower);
-    stw(ptr + 8, temp.l.upper);
-}
-
-#define FPUS_IE (1 << 0)
-#define FPUS_DE (1 << 1)
-#define FPUS_ZE (1 << 2)
-#define FPUS_OE (1 << 3)
-#define FPUS_UE (1 << 4)
-#define FPUS_PE (1 << 5)
-#define FPUS_SF (1 << 6)
-#define FPUS_SE (1 << 7)
-#define FPUS_B  (1 << 15)
-
-#define FPUC_EM 0x3f
-
 static inline uint32_t compute_eflags(void)
 {
     return env->eflags | helper_cc_compute_all(CC_OP) | (DF & DF_MASK);
@@ -189,10 +130,6 @@  static const uint8_t rclb_table[32] = {
     6, 7, 8, 0, 1, 2, 3, 4,
 };

-#define floatx80_lg2 make_floatx80(0x3ffd, 0x9a209a84fbcff799LL)
-#define floatx80_l2e make_floatx80(0x3fff, 0xb8aa3b295c17f0bcLL)
-#define floatx80_l2t make_floatx80(0x4000, 0xd49a784bcd1b8afeLL)
-
 /* broken thread support */

 static spinlock_t global_cpu_lock = SPIN_LOCK_UNLOCKED;
@@ -3749,1431 +3686,289 @@  void helper_verw(target_ulong selector1)
     CC_SRC = eflags | CC_Z;
 }

-/* x87 FPU helpers */
-
-static inline double floatx80_to_double(floatx80 a)
-{
-    union {
-        float64 f64;
-        double d;
-    } u;
-
-    u.f64 = floatx80_to_float64(a, &env->fp_status);
-    return u.d;
-}
-
-static inline floatx80 double_to_floatx80(double a)
-{
-    union {
-        float64 f64;
-        double d;
-    } u;
-
-    u.d = a;
-    return float64_to_floatx80(u.f64, &env->fp_status);
-}
-
-static void fpu_set_exception(int mask)
+#if defined(CONFIG_USER_ONLY)
+void cpu_x86_load_seg(CPUX86State *s, int seg_reg, int selector)
 {
-    env->fpus |= mask;
-    if (env->fpus & (~env->fpuc & FPUC_EM)) {
-        env->fpus |= FPUS_SE | FPUS_B;
-    }
-}
+    CPUX86State *saved_env;

-static inline floatx80 helper_fdiv(floatx80 a, floatx80 b)
-{
-    if (floatx80_is_zero(b)) {
-        fpu_set_exception(FPUS_ZE);
+    saved_env = env;
+    env = s;
+    if (!(env->cr[0] & CR0_PE_MASK) || (env->eflags & VM_MASK)) {
+        selector &= 0xffff;
+        cpu_x86_load_seg_cache(env, seg_reg, selector,
+                               (selector << 4), 0xffff, 0);
+    } else {
+        helper_load_seg(seg_reg, selector);
     }
-    return floatx80_div(a, b, &env->fp_status);
+    env = saved_env;
 }
-
-static void fpu_raise_exception(void)
-{
-    if (env->cr[0] & CR0_NE_MASK) {
-        raise_exception(env, EXCP10_COPR);
-    }
-#if !defined(CONFIG_USER_ONLY)
-    else {
-        cpu_set_ferr(env);
-    }
 #endif
-}
-
-void helper_flds_FT0(uint32_t val)
-{
-    union {
-        float32 f;
-        uint32_t i;
-    } u;

-    u.i = val;
-    FT0 = float32_to_floatx80(u.f, &env->fp_status);
-}
-
-void helper_fldl_FT0(uint64_t val)
+#ifdef TARGET_X86_64
+static void add128(uint64_t *plow, uint64_t *phigh, uint64_t a, uint64_t b)
 {
-    union {
-        float64 f;
-        uint64_t i;
-    } u;
-
-    u.i = val;
-    FT0 = float64_to_floatx80(u.f, &env->fp_status);
+    *plow += a;
+    /* carry test */
+    if (*plow < a) {
+        (*phigh)++;
+    }
+    *phigh += b;
 }

-void helper_fildl_FT0(int32_t val)
+static void neg128(uint64_t *plow, uint64_t *phigh)
 {
-    FT0 = int32_to_floatx80(val, &env->fp_status);
+    *plow = ~*plow;
+    *phigh = ~*phigh;
+    add128(plow, phigh, 1, 0);
 }

-void helper_flds_ST0(uint32_t val)
+/* return TRUE if overflow */
+static int div64(uint64_t *plow, uint64_t *phigh, uint64_t b)
 {
-    int new_fpstt;
-    union {
-        float32 f;
-        uint32_t i;
-    } u;
+    uint64_t q, r, a1, a0;
+    int i, qb, ab;

-    new_fpstt = (env->fpstt - 1) & 7;
-    u.i = val;
-    env->fpregs[new_fpstt].d = float32_to_floatx80(u.f, &env->fp_status);
-    env->fpstt = new_fpstt;
-    env->fptags[new_fpstt] = 0; /* validate stack entry */
+    a0 = *plow;
+    a1 = *phigh;
+    if (a1 == 0) {
+        q = a0 / b;
+        r = a0 % b;
+        *plow = q;
+        *phigh = r;
+    } else {
+        if (a1 >= b) {
+            return 1;
+        }
+        /* XXX: use a better algorithm */
+        for (i = 0; i < 64; i++) {
+            ab = a1 >> 63;
+            a1 = (a1 << 1) | (a0 >> 63);
+            if (ab || a1 >= b) {
+                a1 -= b;
+                qb = 1;
+            } else {
+                qb = 0;
+            }
+            a0 = (a0 << 1) | qb;
+        }
+#if defined(DEBUG_MULDIV)
+        printf("div: 0x%016" PRIx64 "%016" PRIx64 " / 0x%016" PRIx64
+               ": q=0x%016" PRIx64 " r=0x%016" PRIx64 "\n",
+               *phigh, *plow, b, a0, a1);
+#endif
+        *plow = a0;
+        *phigh = a1;
+    }
+    return 0;
 }

-void helper_fldl_ST0(uint64_t val)
+/* return TRUE if overflow */
+static int idiv64(uint64_t *plow, uint64_t *phigh, int64_t b)
 {
-    int new_fpstt;
-    union {
-        float64 f;
-        uint64_t i;
-    } u;
+    int sa, sb;

-    new_fpstt = (env->fpstt - 1) & 7;
-    u.i = val;
-    env->fpregs[new_fpstt].d = float64_to_floatx80(u.f, &env->fp_status);
-    env->fpstt = new_fpstt;
-    env->fptags[new_fpstt] = 0; /* validate stack entry */
+    sa = ((int64_t)*phigh < 0);
+    if (sa) {
+        neg128(plow, phigh);
+    }
+    sb = (b < 0);
+    if (sb) {
+        b = -b;
+    }
+    if (div64(plow, phigh, b) != 0) {
+        return 1;
+    }
+    if (sa ^ sb) {
+        if (*plow > (1ULL << 63)) {
+            return 1;
+        }
+        *plow = -*plow;
+    } else {
+        if (*plow >= (1ULL << 63)) {
+            return 1;
+        }
+    }
+    if (sa) {
+        *phigh = -*phigh;
+    }
+    return 0;
 }

-void helper_fildl_ST0(int32_t val)
+void helper_mulq_EAX_T0(target_ulong t0)
 {
-    int new_fpstt;
+    uint64_t r0, r1;

-    new_fpstt = (env->fpstt - 1) & 7;
-    env->fpregs[new_fpstt].d = int32_to_floatx80(val, &env->fp_status);
-    env->fpstt = new_fpstt;
-    env->fptags[new_fpstt] = 0; /* validate stack entry */
+    mulu64(&r0, &r1, EAX, t0);
+    EAX = r0;
+    EDX = r1;
+    CC_DST = r0;
+    CC_SRC = r1;
 }

-void helper_fildll_ST0(int64_t val)
+void helper_imulq_EAX_T0(target_ulong t0)
 {
-    int new_fpstt;
+    uint64_t r0, r1;

-    new_fpstt = (env->fpstt - 1) & 7;
-    env->fpregs[new_fpstt].d = int64_to_floatx80(val, &env->fp_status);
-    env->fpstt = new_fpstt;
-    env->fptags[new_fpstt] = 0; /* validate stack entry */
+    muls64(&r0, &r1, EAX, t0);
+    EAX = r0;
+    EDX = r1;
+    CC_DST = r0;
+    CC_SRC = ((int64_t)r1 != ((int64_t)r0 >> 63));
 }

-uint32_t helper_fsts_ST0(void)
+target_ulong helper_imulq_T0_T1(target_ulong t0, target_ulong t1)
 {
-    union {
-        float32 f;
-        uint32_t i;
-    } u;
+    uint64_t r0, r1;

-    u.f = floatx80_to_float32(ST0, &env->fp_status);
-    return u.i;
+    muls64(&r0, &r1, t0, t1);
+    CC_DST = r0;
+    CC_SRC = ((int64_t)r1 != ((int64_t)r0 >> 63));
+    return r0;
 }

-uint64_t helper_fstl_ST0(void)
+void helper_divq_EAX(target_ulong t0)
 {
-    union {
-        float64 f;
-        uint64_t i;
-    } u;
+    uint64_t r0, r1;

-    u.f = floatx80_to_float64(ST0, &env->fp_status);
-    return u.i;
+    if (t0 == 0) {
+        raise_exception(env, EXCP00_DIVZ);
+    }
+    r0 = EAX;
+    r1 = EDX;
+    if (div64(&r0, &r1, t0)) {
+        raise_exception(env, EXCP00_DIVZ);
+    }
+    EAX = r0;
+    EDX = r1;
 }

-int32_t helper_fist_ST0(void)
+void helper_idivq_EAX(target_ulong t0)
 {
-    int32_t val;
+    uint64_t r0, r1;

-    val = floatx80_to_int32(ST0, &env->fp_status);
-    if (val != (int16_t)val) {
-        val = -32768;
+    if (t0 == 0) {
+        raise_exception(env, EXCP00_DIVZ);
     }
-    return val;
+    r0 = EAX;
+    r1 = EDX;
+    if (idiv64(&r0, &r1, t0)) {
+        raise_exception(env, EXCP00_DIVZ);
+    }
+    EAX = r0;
+    EDX = r1;
 }
+#endif

-int32_t helper_fistl_ST0(void)
+static void do_hlt(void)
 {
-    int32_t val;
-
-    val = floatx80_to_int32(ST0, &env->fp_status);
-    return val;
+    env->hflags &= ~HF_INHIBIT_IRQ_MASK; /* needed if sti is just before */
+    env->halted = 1;
+    env->exception_index = EXCP_HLT;
+    cpu_loop_exit(env);
 }

-int64_t helper_fistll_ST0(void)
+void helper_hlt(int next_eip_addend)
 {
-    int64_t val;
+    helper_svm_check_intercept_param(SVM_EXIT_HLT, 0);
+    EIP += next_eip_addend;

-    val = floatx80_to_int64(ST0, &env->fp_status);
-    return val;
+    do_hlt();
 }

-int32_t helper_fistt_ST0(void)
+void helper_monitor(target_ulong ptr)
 {
-    int32_t val;
-
-    val = floatx80_to_int32_round_to_zero(ST0, &env->fp_status);
-    if (val != (int16_t)val) {
-        val = -32768;
+    if ((uint32_t)ECX != 0) {
+        raise_exception(env, EXCP0D_GPF);
     }
-    return val;
+    /* XXX: store address? */
+    helper_svm_check_intercept_param(SVM_EXIT_MONITOR, 0);
 }

-int32_t helper_fisttl_ST0(void)
+void helper_mwait(int next_eip_addend)
 {
-    int32_t val;
+    if ((uint32_t)ECX != 0) {
+        raise_exception(env, EXCP0D_GPF);
+    }
+    helper_svm_check_intercept_param(SVM_EXIT_MWAIT, 0);
+    EIP += next_eip_addend;

-    val = floatx80_to_int32_round_to_zero(ST0, &env->fp_status);
-    return val;
+    /* XXX: not complete but not completely erroneous */
+    if (env->cpu_index != 0 || env->next_cpu != NULL) {
+        /* more than one CPU: do not sleep because another CPU may
+           wake this one */
+    } else {
+        do_hlt();
+    }
 }

-int64_t helper_fisttll_ST0(void)
+void helper_debug(void)
 {
-    int64_t val;
-
-    val = floatx80_to_int64_round_to_zero(ST0, &env->fp_status);
-    return val;
+    env->exception_index = EXCP_DEBUG;
+    cpu_loop_exit(env);
 }

-void helper_fldt_ST0(target_ulong ptr)
+void helper_reset_rf(void)
 {
-    int new_fpstt;
-
-    new_fpstt = (env->fpstt - 1) & 7;
-    env->fpregs[new_fpstt].d = helper_fldt(ptr);
-    env->fpstt = new_fpstt;
-    env->fptags[new_fpstt] = 0; /* validate stack entry */
+    env->eflags &= ~RF_MASK;
 }

-void helper_fstt_ST0(target_ulong ptr)
+void helper_cli(void)
 {
-    helper_fstt(ST0, ptr);
+    env->eflags &= ~IF_MASK;
 }

-void helper_fpush(void)
+void helper_sti(void)
 {
-    fpush();
+    env->eflags |= IF_MASK;
 }

-void helper_fpop(void)
+#if 0
+/* vm86plus instructions */
+void helper_cli_vm(void)
 {
-    fpop();
+    env->eflags &= ~VIF_MASK;
 }

-void helper_fdecstp(void)
+void helper_sti_vm(void)
 {
-    env->fpstt = (env->fpstt - 1) & 7;
-    env->fpus &= ~0x4700;
+    env->eflags |= VIF_MASK;
+    if (env->eflags & VIP_MASK) {
+        raise_exception(env, EXCP0D_GPF);
+    }
 }
+#endif

-void helper_fincstp(void)
+void helper_set_inhibit_irq(void)
 {
-    env->fpstt = (env->fpstt + 1) & 7;
-    env->fpus &= ~0x4700;
+    env->hflags |= HF_INHIBIT_IRQ_MASK;
 }

-/* FPU move */
-
-void helper_ffree_STN(int st_index)
+void helper_reset_inhibit_irq(void)
 {
-    env->fptags[(env->fpstt + st_index) & 7] = 1;
+    env->hflags &= ~HF_INHIBIT_IRQ_MASK;
 }

-void helper_fmov_ST0_FT0(void)
+void helper_boundw(target_ulong a0, int v)
 {
-    ST0 = FT0;
-}
+    int low, high;

-void helper_fmov_FT0_STN(int st_index)
-{
-    FT0 = ST(st_index);
+    low = ldsw(a0);
+    high = ldsw(a0 + 2);
+    v = (int16_t)v;
+    if (v < low || v > high) {
+        raise_exception(env, EXCP05_BOUND);
+    }
 }

-void helper_fmov_ST0_STN(int st_index)
+void helper_boundl(target_ulong a0, int v)
 {
-    ST0 = ST(st_index);
-}
+    int low, high;

-void helper_fmov_STN_ST0(int st_index)
-{
-    ST(st_index) = ST0;
-}
-
-void helper_fxchg_ST0_STN(int st_index)
-{
-    floatx80 tmp;
-
-    tmp = ST(st_index);
-    ST(st_index) = ST0;
-    ST0 = tmp;
-}
-
-/* FPU operations */
-
-static const int fcom_ccval[4] = {0x0100, 0x4000, 0x0000, 0x4500};
-
-void helper_fcom_ST0_FT0(void)
-{
-    int ret;
-
-    ret = floatx80_compare(ST0, FT0, &env->fp_status);
-    env->fpus = (env->fpus & ~0x4500) | fcom_ccval[ret + 1];
-}
-
-void helper_fucom_ST0_FT0(void)
-{
-    int ret;
-
-    ret = floatx80_compare_quiet(ST0, FT0, &env->fp_status);
-    env->fpus = (env->fpus & ~0x4500) | fcom_ccval[ret + 1];
-}
-
-static const int fcomi_ccval[4] = {CC_C, CC_Z, 0, CC_Z | CC_P | CC_C};
-
-void helper_fcomi_ST0_FT0(void)
-{
-    int eflags;
-    int ret;
-
-    ret = floatx80_compare(ST0, FT0, &env->fp_status);
-    eflags = helper_cc_compute_all(CC_OP);
-    eflags = (eflags & ~(CC_Z | CC_P | CC_C)) | fcomi_ccval[ret + 1];
-    CC_SRC = eflags;
-}
-
-void helper_fucomi_ST0_FT0(void)
-{
-    int eflags;
-    int ret;
-
-    ret = floatx80_compare_quiet(ST0, FT0, &env->fp_status);
-    eflags = helper_cc_compute_all(CC_OP);
-    eflags = (eflags & ~(CC_Z | CC_P | CC_C)) | fcomi_ccval[ret + 1];
-    CC_SRC = eflags;
-}
-
-void helper_fadd_ST0_FT0(void)
-{
-    ST0 = floatx80_add(ST0, FT0, &env->fp_status);
-}
-
-void helper_fmul_ST0_FT0(void)
-{
-    ST0 = floatx80_mul(ST0, FT0, &env->fp_status);
-}
-
-void helper_fsub_ST0_FT0(void)
-{
-    ST0 = floatx80_sub(ST0, FT0, &env->fp_status);
-}
-
-void helper_fsubr_ST0_FT0(void)
-{
-    ST0 = floatx80_sub(FT0, ST0, &env->fp_status);
-}
-
-void helper_fdiv_ST0_FT0(void)
-{
-    ST0 = helper_fdiv(ST0, FT0);
-}
-
-void helper_fdivr_ST0_FT0(void)
-{
-    ST0 = helper_fdiv(FT0, ST0);
-}
-
-/* fp operations between STN and ST0 */
-
-void helper_fadd_STN_ST0(int st_index)
-{
-    ST(st_index) = floatx80_add(ST(st_index), ST0, &env->fp_status);
-}
-
-void helper_fmul_STN_ST0(int st_index)
-{
-    ST(st_index) = floatx80_mul(ST(st_index), ST0, &env->fp_status);
-}
-
-void helper_fsub_STN_ST0(int st_index)
-{
-    ST(st_index) = floatx80_sub(ST(st_index), ST0, &env->fp_status);
-}
-
-void helper_fsubr_STN_ST0(int st_index)
-{
-    ST(st_index) = floatx80_sub(ST0, ST(st_index), &env->fp_status);
-}
-
-void helper_fdiv_STN_ST0(int st_index)
-{
-    floatx80 *p;
-
-    p = &ST(st_index);
-    *p = helper_fdiv(*p, ST0);
-}
-
-void helper_fdivr_STN_ST0(int st_index)
-{
-    floatx80 *p;
-
-    p = &ST(st_index);
-    *p = helper_fdiv(ST0, *p);
-}
-
-/* misc FPU operations */
-void helper_fchs_ST0(void)
-{
-    ST0 = floatx80_chs(ST0);
-}
-
-void helper_fabs_ST0(void)
-{
-    ST0 = floatx80_abs(ST0);
-}
-
-void helper_fld1_ST0(void)
-{
-    ST0 = floatx80_one;
-}
-
-void helper_fldl2t_ST0(void)
-{
-    ST0 = floatx80_l2t;
-}
-
-void helper_fldl2e_ST0(void)
-{
-    ST0 = floatx80_l2e;
-}
-
-void helper_fldpi_ST0(void)
-{
-    ST0 = floatx80_pi;
-}
-
-void helper_fldlg2_ST0(void)
-{
-    ST0 = floatx80_lg2;
-}
-
-void helper_fldln2_ST0(void)
-{
-    ST0 = floatx80_ln2;
-}
-
-void helper_fldz_ST0(void)
-{
-    ST0 = floatx80_zero;
-}
-
-void helper_fldz_FT0(void)
-{
-    FT0 = floatx80_zero;
-}
-
-uint32_t helper_fnstsw(void)
-{
-    return (env->fpus & ~0x3800) | (env->fpstt & 0x7) << 11;
-}
-
-uint32_t helper_fnstcw(void)
-{
-    return env->fpuc;
-}
-
-static void update_fp_status(void)
-{
-    int rnd_type;
-
-    /* set rounding mode */
-    switch (env->fpuc & FPU_RC_MASK) {
-    default:
-    case FPU_RC_NEAR:
-        rnd_type = float_round_nearest_even;
-        break;
-    case FPU_RC_DOWN:
-        rnd_type = float_round_down;
-        break;
-    case FPU_RC_UP:
-        rnd_type = float_round_up;
-        break;
-    case FPU_RC_CHOP:
-        rnd_type = float_round_to_zero;
-        break;
-    }
-    set_float_rounding_mode(rnd_type, &env->fp_status);
-    switch ((env->fpuc >> 8) & 3) {
-    case 0:
-        rnd_type = 32;
-        break;
-    case 2:
-        rnd_type = 64;
-        break;
-    case 3:
-    default:
-        rnd_type = 80;
-        break;
-    }
-    set_floatx80_rounding_precision(rnd_type, &env->fp_status);
-}
-
-void helper_fldcw(uint32_t val)
-{
-    env->fpuc = val;
-    update_fp_status();
-}
-
-void helper_fclex(void)
-{
-    env->fpus &= 0x7f00;
-}
-
-void helper_fwait(void)
-{
-    if (env->fpus & FPUS_SE) {
-        fpu_raise_exception();
-    }
-}
-
-void helper_fninit(void)
-{
-    env->fpus = 0;
-    env->fpstt = 0;
-    env->fpuc = 0x37f;
-    env->fptags[0] = 1;
-    env->fptags[1] = 1;
-    env->fptags[2] = 1;
-    env->fptags[3] = 1;
-    env->fptags[4] = 1;
-    env->fptags[5] = 1;
-    env->fptags[6] = 1;
-    env->fptags[7] = 1;
-}
-
-/* BCD ops */
-
-void helper_fbld_ST0(target_ulong ptr)
-{
-    floatx80 tmp;
-    uint64_t val;
-    unsigned int v;
-    int i;
-
-    val = 0;
-    for (i = 8; i >= 0; i--) {
-        v = ldub(ptr + i);
-        val = (val * 100) + ((v >> 4) * 10) + (v & 0xf);
-    }
-    tmp = int64_to_floatx80(val, &env->fp_status);
-    if (ldub(ptr + 9) & 0x80) {
-        floatx80_chs(tmp);
-    }
-    fpush();
-    ST0 = tmp;
-}
-
-void helper_fbst_ST0(target_ulong ptr)
-{
-    int v;
-    target_ulong mem_ref, mem_end;
-    int64_t val;
-
-    val = floatx80_to_int64(ST0, &env->fp_status);
-    mem_ref = ptr;
-    mem_end = mem_ref + 9;
-    if (val < 0) {
-        stb(mem_end, 0x80);
-        val = -val;
-    } else {
-        stb(mem_end, 0x00);
-    }
-    while (mem_ref < mem_end) {
-        if (val == 0) {
-            break;
-        }
-        v = val % 100;
-        val = val / 100;
-        v = ((v / 10) << 4) | (v % 10);
-        stb(mem_ref++, v);
-    }
-    while (mem_ref < mem_end) {
-        stb(mem_ref++, 0);
-    }
-}
-
-void helper_f2xm1(void)
-{
-    double val = floatx80_to_double(ST0);
-
-    val = pow(2.0, val) - 1.0;
-    ST0 = double_to_floatx80(val);
-}
-
-void helper_fyl2x(void)
-{
-    double fptemp = floatx80_to_double(ST0);
-
-    if (fptemp > 0.0) {
-        fptemp = log(fptemp) / log(2.0); /* log2(ST) */
-        fptemp *= floatx80_to_double(ST1);
-        ST1 = double_to_floatx80(fptemp);
-        fpop();
-    } else {
-        env->fpus &= ~0x4700;
-        env->fpus |= 0x400;
-    }
-}
-
-void helper_fptan(void)
-{
-    double fptemp = floatx80_to_double(ST0);
-
-    if ((fptemp > MAXTAN) || (fptemp < -MAXTAN)) {
-        env->fpus |= 0x400;
-    } else {
-        fptemp = tan(fptemp);
-        ST0 = double_to_floatx80(fptemp);
-        fpush();
-        ST0 = floatx80_one;
-        env->fpus &= ~0x400; /* C2 <-- 0 */
-        /* the above code is for |arg| < 2**52 only */
-    }
-}
-
-void helper_fpatan(void)
-{
-    double fptemp, fpsrcop;
-
-    fpsrcop = floatx80_to_double(ST1);
-    fptemp = floatx80_to_double(ST0);
-    ST1 = double_to_floatx80(atan2(fpsrcop, fptemp));
-    fpop();
-}
-
-void helper_fxtract(void)
-{
-    CPU_LDoubleU temp;
-
-    temp.d = ST0;
-
-    if (floatx80_is_zero(ST0)) {
-        /* Easy way to generate -inf and raising division by 0 exception */
-        ST0 = floatx80_div(floatx80_chs(floatx80_one), floatx80_zero,
-                           &env->fp_status);
-        fpush();
-        ST0 = temp.d;
-    } else {
-        int expdif;
-
-        expdif = EXPD(temp) - EXPBIAS;
-        /* DP exponent bias */
-        ST0 = int32_to_floatx80(expdif, &env->fp_status);
-        fpush();
-        BIASEXPONENT(temp);
-        ST0 = temp.d;
-    }
-}
-
-void helper_fprem1(void)
-{
-    double st0, st1, dblq, fpsrcop, fptemp;
-    CPU_LDoubleU fpsrcop1, fptemp1;
-    int expdif;
-    signed long long int q;
-
-    st0 = floatx80_to_double(ST0);
-    st1 = floatx80_to_double(ST1);
-
-    if (isinf(st0) || isnan(st0) || isnan(st1) || (st1 == 0.0)) {
-        ST0 = double_to_floatx80(0.0 / 0.0); /* NaN */
-        env->fpus &= ~0x4700; /* (C3,C2,C1,C0) <-- 0000 */
-        return;
-    }
-
-    fpsrcop = st0;
-    fptemp = st1;
-    fpsrcop1.d = ST0;
-    fptemp1.d = ST1;
-    expdif = EXPD(fpsrcop1) - EXPD(fptemp1);
-
-    if (expdif < 0) {
-        /* optimisation? taken from the AMD docs */
-        env->fpus &= ~0x4700; /* (C3,C2,C1,C0) <-- 0000 */
-        /* ST0 is unchanged */
-        return;
-    }
-
-    if (expdif < 53) {
-        dblq = fpsrcop / fptemp;
-        /* round dblq towards nearest integer */
-        dblq = rint(dblq);
-        st0 = fpsrcop - fptemp * dblq;
-
-        /* convert dblq to q by truncating towards zero */
-        if (dblq < 0.0) {
-            q = (signed long long int)(-dblq);
-        } else {
-            q = (signed long long int)dblq;
-        }
-
-        env->fpus &= ~0x4700; /* (C3,C2,C1,C0) <-- 0000 */
-        /* (C0,C3,C1) <-- (q2,q1,q0) */
-        env->fpus |= (q & 0x4) << (8 - 2);  /* (C0) <-- q2 */
-        env->fpus |= (q & 0x2) << (14 - 1); /* (C3) <-- q1 */
-        env->fpus |= (q & 0x1) << (9 - 0);  /* (C1) <-- q0 */
-    } else {
-        env->fpus |= 0x400;  /* C2 <-- 1 */
-        fptemp = pow(2.0, expdif - 50);
-        fpsrcop = (st0 / st1) / fptemp;
-        /* fpsrcop = integer obtained by chopping */
-        fpsrcop = (fpsrcop < 0.0) ?
-                  -(floor(fabs(fpsrcop))) : floor(fpsrcop);
-        st0 -= (st1 * fpsrcop * fptemp);
-    }
-    ST0 = double_to_floatx80(st0);
-}
-
-void helper_fprem(void)
-{
-    double st0, st1, dblq, fpsrcop, fptemp;
-    CPU_LDoubleU fpsrcop1, fptemp1;
-    int expdif;
-    signed long long int q;
-
-    st0 = floatx80_to_double(ST0);
-    st1 = floatx80_to_double(ST1);
-
-    if (isinf(st0) || isnan(st0) || isnan(st1) || (st1 == 0.0)) {
-        ST0 = double_to_floatx80(0.0 / 0.0); /* NaN */
-        env->fpus &= ~0x4700; /* (C3,C2,C1,C0) <-- 0000 */
-        return;
-    }
-
-    fpsrcop = st0;
-    fptemp = st1;
-    fpsrcop1.d = ST0;
-    fptemp1.d = ST1;
-    expdif = EXPD(fpsrcop1) - EXPD(fptemp1);
-
-    if (expdif < 0) {
-        /* optimisation? taken from the AMD docs */
-        env->fpus &= ~0x4700; /* (C3,C2,C1,C0) <-- 0000 */
-        /* ST0 is unchanged */
-        return;
-    }
-
-    if (expdif < 53) {
-        dblq = fpsrcop / fptemp; /* ST0 / ST1 */
-        /* round dblq towards zero */
-        dblq = (dblq < 0.0) ? ceil(dblq) : floor(dblq);
-        st0 = fpsrcop - fptemp * dblq; /* fpsrcop is ST0 */
-
-        /* convert dblq to q by truncating towards zero */
-        if (dblq < 0.0) {
-            q = (signed long long int)(-dblq);
-        } else {
-            q = (signed long long int)dblq;
-        }
-
-        env->fpus &= ~0x4700; /* (C3,C2,C1,C0) <-- 0000 */
-        /* (C0,C3,C1) <-- (q2,q1,q0) */
-        env->fpus |= (q & 0x4) << (8 - 2);  /* (C0) <-- q2 */
-        env->fpus |= (q & 0x2) << (14 - 1); /* (C3) <-- q1 */
-        env->fpus |= (q & 0x1) << (9 - 0);  /* (C1) <-- q0 */
-    } else {
-        int N = 32 + (expdif % 32); /* as per AMD docs */
-
-        env->fpus |= 0x400;  /* C2 <-- 1 */
-        fptemp = pow(2.0, (double)(expdif - N));
-        fpsrcop = (st0 / st1) / fptemp;
-        /* fpsrcop = integer obtained by chopping */
-        fpsrcop = (fpsrcop < 0.0) ?
-                  -(floor(fabs(fpsrcop))) : floor(fpsrcop);
-        st0 -= (st1 * fpsrcop * fptemp);
-    }
-    ST0 = double_to_floatx80(st0);
-}
-
-void helper_fyl2xp1(void)
-{
-    double fptemp = floatx80_to_double(ST0);
-
-    if ((fptemp + 1.0) > 0.0) {
-        fptemp = log(fptemp + 1.0) / log(2.0); /* log2(ST + 1.0) */
-        fptemp *= floatx80_to_double(ST1);
-        ST1 = double_to_floatx80(fptemp);
-        fpop();
-    } else {
-        env->fpus &= ~0x4700;
-        env->fpus |= 0x400;
-    }
-}
-
-void helper_fsqrt(void)
-{
-    if (floatx80_is_neg(ST0)) {
-        env->fpus &= ~0x4700;  /* (C3,C2,C1,C0) <-- 0000 */
-        env->fpus |= 0x400;
-    }
-    ST0 = floatx80_sqrt(ST0, &env->fp_status);
-}
-
-void helper_fsincos(void)
-{
-    double fptemp = floatx80_to_double(ST0);
-
-    if ((fptemp > MAXTAN) || (fptemp < -MAXTAN)) {
-        env->fpus |= 0x400;
-    } else {
-        ST0 = double_to_floatx80(sin(fptemp));
-        fpush();
-        ST0 = double_to_floatx80(cos(fptemp));
-        env->fpus &= ~0x400;  /* C2 <-- 0 */
-        /* the above code is for |arg| < 2**63 only */
-    }
-}
-
-void helper_frndint(void)
-{
-    ST0 = floatx80_round_to_int(ST0, &env->fp_status);
-}
-
-void helper_fscale(void)
-{
-    if (floatx80_is_any_nan(ST1)) {
-        ST0 = ST1;
-    } else {
-        int n = floatx80_to_int32_round_to_zero(ST1, &env->fp_status);
-        ST0 = floatx80_scalbn(ST0, n, &env->fp_status);
-    }
-}
-
-void helper_fsin(void)
-{
-    double fptemp = floatx80_to_double(ST0);
-
-    if ((fptemp > MAXTAN) || (fptemp < -MAXTAN)) {
-        env->fpus |= 0x400;
-    } else {
-        ST0 = double_to_floatx80(sin(fptemp));
-        env->fpus &= ~0x400;  /* C2 <-- 0 */
-        /* the above code is for |arg| < 2**53 only */
-    }
-}
-
-void helper_fcos(void)
-{
-    double fptemp = floatx80_to_double(ST0);
-
-    if ((fptemp > MAXTAN) || (fptemp < -MAXTAN)) {
-        env->fpus |= 0x400;
-    } else {
-        ST0 = double_to_floatx80(cos(fptemp));
-        env->fpus &= ~0x400;  /* C2 <-- 0 */
-        /* the above code is for |arg| < 2**63 only */
-    }
-}
-
-void helper_fxam_ST0(void)
-{
-    CPU_LDoubleU temp;
-    int expdif;
-
-    temp.d = ST0;
-
-    env->fpus &= ~0x4700; /* (C3,C2,C1,C0) <-- 0000 */
-    if (SIGND(temp)) {
-        env->fpus |= 0x200; /* C1 <-- 1 */
-    }
-
-    /* XXX: test fptags too */
-    expdif = EXPD(temp);
-    if (expdif == MAXEXPD) {
-        if (MANTD(temp) == 0x8000000000000000ULL) {
-            env->fpus |= 0x500; /* Infinity */
-        } else {
-            env->fpus |= 0x100; /* NaN */
-        }
-    } else if (expdif == 0) {
-        if (MANTD(temp) == 0) {
-            env->fpus |=  0x4000; /* Zero */
-        } else {
-            env->fpus |= 0x4400; /* Denormal */
-        }
-    } else {
-        env->fpus |= 0x400;
-    }
-}
-
-void helper_fstenv(target_ulong ptr, int data32)
-{
-    int fpus, fptag, exp, i;
-    uint64_t mant;
-    CPU_LDoubleU tmp;
-
-    fpus = (env->fpus & ~0x3800) | (env->fpstt & 0x7) << 11;
-    fptag = 0;
-    for (i = 7; i >= 0; i--) {
-        fptag <<= 2;
-        if (env->fptags[i]) {
-            fptag |= 3;
-        } else {
-            tmp.d = env->fpregs[i].d;
-            exp = EXPD(tmp);
-            mant = MANTD(tmp);
-            if (exp == 0 && mant == 0) {
-                /* zero */
-                fptag |= 1;
-            } else if (exp == 0 || exp == MAXEXPD
-                       || (mant & (1LL << 63)) == 0) {
-                /* NaNs, infinity, denormal */
-                fptag |= 2;
-            }
-        }
-    }
-    if (data32) {
-        /* 32 bit */
-        stl(ptr, env->fpuc);
-        stl(ptr + 4, fpus);
-        stl(ptr + 8, fptag);
-        stl(ptr + 12, 0); /* fpip */
-        stl(ptr + 16, 0); /* fpcs */
-        stl(ptr + 20, 0); /* fpoo */
-        stl(ptr + 24, 0); /* fpos */
-    } else {
-        /* 16 bit */
-        stw(ptr, env->fpuc);
-        stw(ptr + 2, fpus);
-        stw(ptr + 4, fptag);
-        stw(ptr + 6, 0);
-        stw(ptr + 8, 0);
-        stw(ptr + 10, 0);
-        stw(ptr + 12, 0);
-    }
-}
-
-void helper_fldenv(target_ulong ptr, int data32)
-{
-    int i, fpus, fptag;
-
-    if (data32) {
-        env->fpuc = lduw(ptr);
-        fpus = lduw(ptr + 4);
-        fptag = lduw(ptr + 8);
-    } else {
-        env->fpuc = lduw(ptr);
-        fpus = lduw(ptr + 2);
-        fptag = lduw(ptr + 4);
-    }
-    env->fpstt = (fpus >> 11) & 7;
-    env->fpus = fpus & ~0x3800;
-    for (i = 0; i < 8; i++) {
-        env->fptags[i] = ((fptag & 3) == 3);
-        fptag >>= 2;
-    }
-}
-
-void helper_fsave(target_ulong ptr, int data32)
-{
-    floatx80 tmp;
-    int i;
-
-    helper_fstenv(ptr, data32);
-
-    ptr += (14 << data32);
-    for (i = 0; i < 8; i++) {
-        tmp = ST(i);
-        helper_fstt(tmp, ptr);
-        ptr += 10;
-    }
-
-    /* fninit */
-    env->fpus = 0;
-    env->fpstt = 0;
-    env->fpuc = 0x37f;
-    env->fptags[0] = 1;
-    env->fptags[1] = 1;
-    env->fptags[2] = 1;
-    env->fptags[3] = 1;
-    env->fptags[4] = 1;
-    env->fptags[5] = 1;
-    env->fptags[6] = 1;
-    env->fptags[7] = 1;
-}
-
-void helper_frstor(target_ulong ptr, int data32)
-{
-    floatx80 tmp;
-    int i;
-
-    helper_fldenv(ptr, data32);
-    ptr += (14 << data32);
-
-    for (i = 0; i < 8; i++) {
-        tmp = helper_fldt(ptr);
-        ST(i) = tmp;
-        ptr += 10;
-    }
-}
-
-
-#if defined(CONFIG_USER_ONLY)
-void cpu_x86_load_seg(CPUX86State *s, int seg_reg, int selector)
-{
-    CPUX86State *saved_env;
-
-    saved_env = env;
-    env = s;
-    if (!(env->cr[0] & CR0_PE_MASK) || (env->eflags & VM_MASK)) {
-        selector &= 0xffff;
-        cpu_x86_load_seg_cache(env, seg_reg, selector,
-                               (selector << 4), 0xffff, 0);
-    } else {
-        helper_load_seg(seg_reg, selector);
-    }
-    env = saved_env;
-}
-
-void cpu_x86_fsave(CPUX86State *s, target_ulong ptr, int data32)
-{
-    CPUX86State *saved_env;
-
-    saved_env = env;
-    env = s;
-
-    helper_fsave(ptr, data32);
-
-    env = saved_env;
-}
-
-void cpu_x86_frstor(CPUX86State *s, target_ulong ptr, int data32)
-{
-    CPUX86State *saved_env;
-
-    saved_env = env;
-    env = s;
-
-    helper_frstor(ptr, data32);
-
-    env = saved_env;
-}
-#endif
-
-void helper_fxsave(target_ulong ptr, int data64)
-{
-    int fpus, fptag, i, nb_xmm_regs;
-    floatx80 tmp;
-    target_ulong addr;
-
-    /* The operand must be 16 byte aligned */
-    if (ptr & 0xf) {
-        raise_exception(env, EXCP0D_GPF);
-    }
-
-    fpus = (env->fpus & ~0x3800) | (env->fpstt & 0x7) << 11;
-    fptag = 0;
-    for (i = 0; i < 8; i++) {
-        fptag |= (env->fptags[i] << i);
-    }
-    stw(ptr, env->fpuc);
-    stw(ptr + 2, fpus);
-    stw(ptr + 4, fptag ^ 0xff);
-#ifdef TARGET_X86_64
-    if (data64) {
-        stq(ptr + 0x08, 0); /* rip */
-        stq(ptr + 0x10, 0); /* rdp */
-    } else
-#endif
-    {
-        stl(ptr + 0x08, 0); /* eip */
-        stl(ptr + 0x0c, 0); /* sel  */
-        stl(ptr + 0x10, 0); /* dp */
-        stl(ptr + 0x14, 0); /* sel  */
-    }
-
-    addr = ptr + 0x20;
-    for (i = 0; i < 8; i++) {
-        tmp = ST(i);
-        helper_fstt(tmp, addr);
-        addr += 16;
-    }
-
-    if (env->cr[4] & CR4_OSFXSR_MASK) {
-        /* XXX: finish it */
-        stl(ptr + 0x18, env->mxcsr); /* mxcsr */
-        stl(ptr + 0x1c, 0x0000ffff); /* mxcsr_mask */
-        if (env->hflags & HF_CS64_MASK) {
-            nb_xmm_regs = 16;
-        } else {
-            nb_xmm_regs = 8;
-        }
-        addr = ptr + 0xa0;
-        /* Fast FXSAVE leaves out the XMM registers */
-        if (!(env->efer & MSR_EFER_FFXSR)
-            || (env->hflags & HF_CPL_MASK)
-            || !(env->hflags & HF_LMA_MASK)) {
-            for (i = 0; i < nb_xmm_regs; i++) {
-                stq(addr, env->xmm_regs[i].XMM_Q(0));
-                stq(addr + 8, env->xmm_regs[i].XMM_Q(1));
-                addr += 16;
-            }
-        }
-    }
-}
-
-void helper_fxrstor(target_ulong ptr, int data64)
-{
-    int i, fpus, fptag, nb_xmm_regs;
-    floatx80 tmp;
-    target_ulong addr;
-
-    /* The operand must be 16 byte aligned */
-    if (ptr & 0xf) {
-        raise_exception(env, EXCP0D_GPF);
-    }
-
-    env->fpuc = lduw(ptr);
-    fpus = lduw(ptr + 2);
-    fptag = lduw(ptr + 4);
-    env->fpstt = (fpus >> 11) & 7;
-    env->fpus = fpus & ~0x3800;
-    fptag ^= 0xff;
-    for (i = 0; i < 8; i++) {
-        env->fptags[i] = ((fptag >> i) & 1);
-    }
-
-    addr = ptr + 0x20;
-    for (i = 0; i < 8; i++) {
-        tmp = helper_fldt(addr);
-        ST(i) = tmp;
-        addr += 16;
-    }
-
-    if (env->cr[4] & CR4_OSFXSR_MASK) {
-        /* XXX: finish it */
-        env->mxcsr = ldl(ptr + 0x18);
-        /* ldl(ptr + 0x1c); */
-        if (env->hflags & HF_CS64_MASK) {
-            nb_xmm_regs = 16;
-        } else {
-            nb_xmm_regs = 8;
-        }
-        addr = ptr + 0xa0;
-        /* Fast FXRESTORE leaves out the XMM registers */
-        if (!(env->efer & MSR_EFER_FFXSR)
-            || (env->hflags & HF_CPL_MASK)
-            || !(env->hflags & HF_LMA_MASK)) {
-            for (i = 0; i < nb_xmm_regs; i++) {
-                env->xmm_regs[i].XMM_Q(0) = ldq(addr);
-                env->xmm_regs[i].XMM_Q(1) = ldq(addr + 8);
-                addr += 16;
-            }
-        }
-    }
-}
-
-void cpu_get_fp80(uint64_t *pmant, uint16_t *pexp, floatx80 f)
-{
-    CPU_LDoubleU temp;
-
-    temp.d = f;
-    *pmant = temp.l.lower;
-    *pexp = temp.l.upper;
-}
-
-floatx80 cpu_set_fp80(uint64_t mant, uint16_t upper)
-{
-    CPU_LDoubleU temp;
-
-    temp.l.upper = upper;
-    temp.l.lower = mant;
-    return temp.d;
-}
-
-#ifdef TARGET_X86_64
-static void add128(uint64_t *plow, uint64_t *phigh, uint64_t a, uint64_t b)
-{
-    *plow += a;
-    /* carry test */
-    if (*plow < a) {
-        (*phigh)++;
-    }
-    *phigh += b;
-}
-
-static void neg128(uint64_t *plow, uint64_t *phigh)
-{
-    *plow = ~*plow;
-    *phigh = ~*phigh;
-    add128(plow, phigh, 1, 0);
-}
-
-/* return TRUE if overflow */
-static int div64(uint64_t *plow, uint64_t *phigh, uint64_t b)
-{
-    uint64_t q, r, a1, a0;
-    int i, qb, ab;
-
-    a0 = *plow;
-    a1 = *phigh;
-    if (a1 == 0) {
-        q = a0 / b;
-        r = a0 % b;
-        *plow = q;
-        *phigh = r;
-    } else {
-        if (a1 >= b) {
-            return 1;
-        }
-        /* XXX: use a better algorithm */
-        for (i = 0; i < 64; i++) {
-            ab = a1 >> 63;
-            a1 = (a1 << 1) | (a0 >> 63);
-            if (ab || a1 >= b) {
-                a1 -= b;
-                qb = 1;
-            } else {
-                qb = 0;
-            }
-            a0 = (a0 << 1) | qb;
-        }
-#if defined(DEBUG_MULDIV)
-        printf("div: 0x%016" PRIx64 "%016" PRIx64 " / 0x%016" PRIx64
-               ": q=0x%016" PRIx64 " r=0x%016" PRIx64 "\n",
-               *phigh, *plow, b, a0, a1);
-#endif
-        *plow = a0;
-        *phigh = a1;
-    }
-    return 0;
-}
-
-/* return TRUE if overflow */
-static int idiv64(uint64_t *plow, uint64_t *phigh, int64_t b)
-{
-    int sa, sb;
-
-    sa = ((int64_t)*phigh < 0);
-    if (sa) {
-        neg128(plow, phigh);
-    }
-    sb = (b < 0);
-    if (sb) {
-        b = -b;
-    }
-    if (div64(plow, phigh, b) != 0) {
-        return 1;
-    }
-    if (sa ^ sb) {
-        if (*plow > (1ULL << 63)) {
-            return 1;
-        }
-        *plow = -*plow;
-    } else {
-        if (*plow >= (1ULL << 63)) {
-            return 1;
-        }
-    }
-    if (sa) {
-        *phigh = -*phigh;
-    }
-    return 0;
-}
-
-void helper_mulq_EAX_T0(target_ulong t0)
-{
-    uint64_t r0, r1;
-
-    mulu64(&r0, &r1, EAX, t0);
-    EAX = r0;
-    EDX = r1;
-    CC_DST = r0;
-    CC_SRC = r1;
-}
-
-void helper_imulq_EAX_T0(target_ulong t0)
-{
-    uint64_t r0, r1;
-
-    muls64(&r0, &r1, EAX, t0);
-    EAX = r0;
-    EDX = r1;
-    CC_DST = r0;
-    CC_SRC = ((int64_t)r1 != ((int64_t)r0 >> 63));
-}
-
-target_ulong helper_imulq_T0_T1(target_ulong t0, target_ulong t1)
-{
-    uint64_t r0, r1;
-
-    muls64(&r0, &r1, t0, t1);
-    CC_DST = r0;
-    CC_SRC = ((int64_t)r1 != ((int64_t)r0 >> 63));
-    return r0;
-}
-
-void helper_divq_EAX(target_ulong t0)
-{
-    uint64_t r0, r1;
-
-    if (t0 == 0) {
-        raise_exception(env, EXCP00_DIVZ);
-    }
-    r0 = EAX;
-    r1 = EDX;
-    if (div64(&r0, &r1, t0)) {
-        raise_exception(env, EXCP00_DIVZ);
-    }
-    EAX = r0;
-    EDX = r1;
-}
-
-void helper_idivq_EAX(target_ulong t0)
-{
-    uint64_t r0, r1;
-
-    if (t0 == 0) {
-        raise_exception(env, EXCP00_DIVZ);
-    }
-    r0 = EAX;
-    r1 = EDX;
-    if (idiv64(&r0, &r1, t0)) {
-        raise_exception(env, EXCP00_DIVZ);
-    }
-    EAX = r0;
-    EDX = r1;
-}
-#endif
-
-static void do_hlt(void)
-{
-    env->hflags &= ~HF_INHIBIT_IRQ_MASK; /* needed if sti is just before */
-    env->halted = 1;
-    env->exception_index = EXCP_HLT;
-    cpu_loop_exit(env);
-}
-
-void helper_hlt(int next_eip_addend)
-{
-    helper_svm_check_intercept_param(SVM_EXIT_HLT, 0);
-    EIP += next_eip_addend;
-
-    do_hlt();
-}
-
-void helper_monitor(target_ulong ptr)
-{
-    if ((uint32_t)ECX != 0) {
-        raise_exception(env, EXCP0D_GPF);
-    }
-    /* XXX: store address? */
-    helper_svm_check_intercept_param(SVM_EXIT_MONITOR, 0);
-}
-
-void helper_mwait(int next_eip_addend)
-{
-    if ((uint32_t)ECX != 0) {
-        raise_exception(env, EXCP0D_GPF);
-    }
-    helper_svm_check_intercept_param(SVM_EXIT_MWAIT, 0);
-    EIP += next_eip_addend;
-
-    /* XXX: not complete but not completely erroneous */
-    if (env->cpu_index != 0 || env->next_cpu != NULL) {
-        /* more than one CPU: do not sleep because another CPU may
-           wake this one */
-    } else {
-        do_hlt();
-    }
-}
-
-void helper_debug(void)
-{
-    env->exception_index = EXCP_DEBUG;
-    cpu_loop_exit(env);
-}
-
-void helper_reset_rf(void)
-{
-    env->eflags &= ~RF_MASK;
-}
-
-void helper_cli(void)
-{
-    env->eflags &= ~IF_MASK;
-}
-
-void helper_sti(void)
-{
-    env->eflags |= IF_MASK;
-}
-
-#if 0
-/* vm86plus instructions */
-void helper_cli_vm(void)
-{
-    env->eflags &= ~VIF_MASK;
-}
-
-void helper_sti_vm(void)
-{
-    env->eflags |= VIF_MASK;
-    if (env->eflags & VIP_MASK) {
-        raise_exception(env, EXCP0D_GPF);
-    }
-}
-#endif
-
-void helper_set_inhibit_irq(void)
-{
-    env->hflags |= HF_INHIBIT_IRQ_MASK;
-}
-
-void helper_reset_inhibit_irq(void)
-{
-    env->hflags &= ~HF_INHIBIT_IRQ_MASK;
-}
-
-void helper_boundw(target_ulong a0, int v)
-{
-    int low, high;
-
-    low = ldsw(a0);
-    high = ldsw(a0 + 2);
-    v = (int16_t)v;
-    if (v < low || v > high) {
-        raise_exception(env, EXCP05_BOUND);
-    }
-}
-
-void helper_boundl(target_ulong a0, int v)
-{
-    int low, high;
-
-    low = ldl(a0);
-    high = ldl(a0 + 4);
-    if (v < low || v > high) {
-        raise_exception(env, EXCP05_BOUND);
-    }
+    low = ldl(a0);
+    high = ldl(a0 + 4);
+    if (v < low || v > high) {
+        raise_exception(env, EXCP05_BOUND);
+    }
 }

 #if !defined(CONFIG_USER_ONLY)
@@ -5918,78 +4713,6 @@  void cpu_vmexit(CPUX86State *nenv, uint32_t
exit_code, uint64_t exit_info_1)

 #endif

-/* MMX/SSE */
-/* XXX: optimize by storing fptt and fptags in the static cpu state */
-
-#define SSE_DAZ             0x0040
-#define SSE_RC_MASK         0x6000
-#define SSE_RC_NEAR         0x0000
-#define SSE_RC_DOWN         0x2000
-#define SSE_RC_UP           0x4000
-#define SSE_RC_CHOP         0x6000
-#define SSE_FZ              0x8000
-
-static void update_sse_status(void)
-{
-    int rnd_type;
-
-    /* set rounding mode */
-    switch (env->mxcsr & SSE_RC_MASK) {
-    default:
-    case SSE_RC_NEAR:
-        rnd_type = float_round_nearest_even;
-        break;
-    case SSE_RC_DOWN:
-        rnd_type = float_round_down;
-        break;
-    case SSE_RC_UP:
-        rnd_type = float_round_up;
-        break;
-    case SSE_RC_CHOP:
-        rnd_type = float_round_to_zero;
-        break;
-    }
-    set_float_rounding_mode(rnd_type, &env->sse_status);
-
-    /* set denormals are zero */
-    set_flush_inputs_to_zero((env->mxcsr & SSE_DAZ) ? 1 : 0, &env->sse_status);
-
-    /* set flush to zero */
-    set_flush_to_zero((env->mxcsr & SSE_FZ) ? 1 : 0, &env->fp_status);
-}
-
-void helper_ldmxcsr(uint32_t val)
-{
-    env->mxcsr = val;
-    update_sse_status();
-}
-
-void helper_enter_mmx(void)
-{
-    env->fpstt = 0;
-    *(uint32_t *)(env->fptags) = 0;
-    *(uint32_t *)(env->fptags + 4) = 0;
-}
-
-void helper_emms(void)
-{
-    /* set to empty state */
-    *(uint32_t *)(env->fptags) = 0x01010101;
-    *(uint32_t *)(env->fptags + 4) = 0x01010101;
-}
-
-/* XXX: suppress */
-void helper_movq(void *d, void *s)
-{
-    *(uint64_t *)d = *(uint64_t *)s;
-}
-
-#define SHIFT 0
-#include "ops_sse.h"
-
-#define SHIFT 1
-#include "ops_sse.h"
-
 #define SHIFT 0
 #include "helper_template.h"