From patchwork Sat Jun 9 16:19:02 2012 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Blue Swirl X-Patchwork-Id: 163955 Return-Path: X-Original-To: incoming@patchwork.ozlabs.org Delivered-To: patchwork-incoming@bilbo.ozlabs.org Received: from lists.gnu.org (lists.gnu.org [208.118.235.17]) (using TLSv1 with cipher AES256-SHA (256/256 bits)) (Client did not present a certificate) by ozlabs.org (Postfix) with ESMTPS id 10C7CB6FCA for ; Sun, 10 Jun 2012 03:52:03 +1000 (EST) Received: from localhost ([::1]:57925 helo=lists.gnu.org) by lists.gnu.org with esmtp (Exim 4.71) (envelope-from ) id 1SdOPQ-0005iA-0o for incoming@patchwork.ozlabs.org; Sat, 09 Jun 2012 12:21:04 -0400 Received: from eggs.gnu.org ([208.118.235.92]:41364) by lists.gnu.org with esmtp (Exim 4.71) (envelope-from ) id 1SdOO8-0003Eo-Of for qemu-devel@nongnu.org; Sat, 09 Jun 2012 12:19:51 -0400 Received: from Debian-exim by eggs.gnu.org with spam-scanned (Exim 4.71) (envelope-from ) id 1SdOO0-0003MF-Ji for qemu-devel@nongnu.org; Sat, 09 Jun 2012 12:19:44 -0400 Received: from mail-ee0-f45.google.com ([74.125.83.45]:61400) by eggs.gnu.org with esmtp (Exim 4.71) (envelope-from ) id 1SdONz-0003Kc-VS for qemu-devel@nongnu.org; Sat, 09 Jun 2012 12:19:36 -0400 Received: by mail-ee0-f45.google.com with SMTP id d41so1793685eek.4 for ; Sat, 09 Jun 2012 09:19:35 -0700 (PDT) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=gmail.com; s=20120113; h=from:to:subject:date:message-id:x-mailer:in-reply-to:references :in-reply-to:references; bh=37LmVhr6ZRem9ORTavo6SocJW1izdMBsZ35kU+GgO3M=; b=sDL+5YWm/zMv5L1qKYqqYvk/uZIl583brbo8rqW0b1oNuXWMHImOKWQDp79O7l+VNr 7uyl+26UszA5WO878tMPzGoFRnnM7zW+jB5rqFgpKpI1SRHSTxNlSLMo9jnKKj0YXGAN pP9B266XL/QiLKQ5F1REHdINbgq8sYAsfO1hHAhsidNPNq3Rv9EwU6eYeZDxSHU/dX6G JV2ISHbJ0j8U7eJbmVZ9vZ1mz26oHgSRz8TfOI5rSlcHNi+J9VcZY4HKqerpjkZjsnnI RHp5krV4HWTDuV07GTGqPH+eF43bFBhasWY7XrUMR5hbglYlGXazvnRTvBUbKf7PrIK0 rETA== Received: by 10.14.39.77 with SMTP id c53mr282162eeb.51.1339258774821; Sat, 09 Jun 2012 09:19:34 -0700 (PDT) Received: from localhost.localdomain (blueswirl.broker.freenet6.net. [2001:5c0:1400:b::d5a3]) by mx.google.com with ESMTPS id h53sm33625570eea.1.2012.06.09.09.19.32 (version=TLSv1/SSLv3 cipher=OTHER); Sat, 09 Jun 2012 09:19:33 -0700 (PDT) From: Blue Swirl To: qemu-devel@nongnu.org Date: Sat, 9 Jun 2012 16:19:02 +0000 Message-Id: X-Mailer: git-send-email 1.7.2.5 In-Reply-To: References: In-Reply-To: References: X-detected-operating-system: by eggs.gnu.org: Genre and OS details not recognized. X-Received-From: 74.125.83.45 Subject: [Qemu-devel] [PATCH 06/25] x86: split off FPU helpers X-BeenThere: qemu-devel@nongnu.org X-Mailman-Version: 2.1.14 Precedence: list List-Id: List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Errors-To: qemu-devel-bounces+incoming=patchwork.ozlabs.org@nongnu.org Sender: qemu-devel-bounces+incoming=patchwork.ozlabs.org@nongnu.org Move FPU and MMX/SSE helpers to fpu_helpers.c. Signed-off-by: Blue Swirl --- target-i386/Makefile.objs | 3 +- target-i386/fpu_helper.c | 1304 +++++++++++++++++++++++++++++++++++ target-i386/op_helper.c | 1671 ++++++--------------------------------------- 3 files changed, 1503 insertions(+), 1475 deletions(-) create mode 100644 target-i386/fpu_helper.c diff --git a/target-i386/Makefile.objs b/target-i386/Makefile.objs index c0feffe..7c15de4 100644 --- a/target-i386/Makefile.objs +++ b/target-i386/Makefile.objs @@ -1,8 +1,9 @@ obj-y += translate.o op_helper.o helper.o cpu.o -obj-y += excp_helper.o +obj-y += excp_helper.o fpu_helper.o obj-$(CONFIG_SOFTMMU) += machine.o arch_memory_mapping.o arch_dump.o obj-$(CONFIG_KVM) += kvm.o hyperv.o obj-$(CONFIG_LINUX_USER) += ioport-user.o obj-$(CONFIG_BSD_USER) += ioport-user.o $(obj)/op_helper.o: QEMU_CFLAGS += $(HELPER_CFLAGS) +$(obj)/fpu_helper.o: QEMU_CFLAGS += $(HELPER_CFLAGS) diff --git a/target-i386/fpu_helper.c b/target-i386/fpu_helper.c new file mode 100644 index 0000000..6065c2e --- /dev/null +++ b/target-i386/fpu_helper.c @@ -0,0 +1,1304 @@ +/* + * x86 FPU, MMX/3DNow!/SSE/SSE2/SSE3/SSSE3/SSE4/PNI helpers + * + * Copyright (c) 2003 Fabrice Bellard + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, see . + */ + +#include +#include "cpu.h" +#include "dyngen-exec.h" +#include "helper.h" + +#if !defined(CONFIG_USER_ONLY) +#include "softmmu_exec.h" +#endif /* !defined(CONFIG_USER_ONLY) */ + +#define FPU_RC_MASK 0xc00 +#define FPU_RC_NEAR 0x000 +#define FPU_RC_DOWN 0x400 +#define FPU_RC_UP 0x800 +#define FPU_RC_CHOP 0xc00 + +#define MAXTAN 9223372036854775808.0 + +/* the following deal with x86 long double-precision numbers */ +#define MAXEXPD 0x7fff +#define EXPBIAS 16383 +#define EXPD(fp) (fp.l.upper & 0x7fff) +#define SIGND(fp) ((fp.l.upper) & 0x8000) +#define MANTD(fp) (fp.l.lower) +#define BIASEXPONENT(fp) fp.l.upper = (fp.l.upper & ~(0x7fff)) | EXPBIAS + +#define FPUS_IE (1 << 0) +#define FPUS_DE (1 << 1) +#define FPUS_ZE (1 << 2) +#define FPUS_OE (1 << 3) +#define FPUS_UE (1 << 4) +#define FPUS_PE (1 << 5) +#define FPUS_SF (1 << 6) +#define FPUS_SE (1 << 7) +#define FPUS_B (1 << 15) + +#define FPUC_EM 0x3f + +#define floatx80_lg2 make_floatx80(0x3ffd, 0x9a209a84fbcff799LL) +#define floatx80_l2e make_floatx80(0x3fff, 0xb8aa3b295c17f0bcLL) +#define floatx80_l2t make_floatx80(0x4000, 0xd49a784bcd1b8afeLL) + +static inline void fpush(void) +{ + env->fpstt = (env->fpstt - 1) & 7; + env->fptags[env->fpstt] = 0; /* validate stack entry */ +} + +static inline void fpop(void) +{ + env->fptags[env->fpstt] = 1; /* invalidate stack entry */ + env->fpstt = (env->fpstt + 1) & 7; +} + +static inline floatx80 helper_fldt(target_ulong ptr) +{ + CPU_LDoubleU temp; + + temp.l.lower = ldq(ptr); + temp.l.upper = lduw(ptr + 8); + return temp.d; +} + +static inline void helper_fstt(floatx80 f, target_ulong ptr) +{ + CPU_LDoubleU temp; + + temp.d = f; + stq(ptr, temp.l.lower); + stw(ptr + 8, temp.l.upper); +} + +/* x87 FPU helpers */ + +static inline double floatx80_to_double(floatx80 a) +{ + union { + float64 f64; + double d; + } u; + + u.f64 = floatx80_to_float64(a, &env->fp_status); + return u.d; +} + +static inline floatx80 double_to_floatx80(double a) +{ + union { + float64 f64; + double d; + } u; + + u.d = a; + return float64_to_floatx80(u.f64, &env->fp_status); +} + +static void fpu_set_exception(int mask) +{ + env->fpus |= mask; + if (env->fpus & (~env->fpuc & FPUC_EM)) { + env->fpus |= FPUS_SE | FPUS_B; + } +} + +static inline floatx80 helper_fdiv(floatx80 a, floatx80 b) +{ + if (floatx80_is_zero(b)) { + fpu_set_exception(FPUS_ZE); + } + return floatx80_div(a, b, &env->fp_status); +} + +static void fpu_raise_exception(void) +{ + if (env->cr[0] & CR0_NE_MASK) { + raise_exception(env, EXCP10_COPR); + } +#if !defined(CONFIG_USER_ONLY) + else { + cpu_set_ferr(env); + } +#endif +} + +void helper_flds_FT0(uint32_t val) +{ + union { + float32 f; + uint32_t i; + } u; + + u.i = val; + FT0 = float32_to_floatx80(u.f, &env->fp_status); +} + +void helper_fldl_FT0(uint64_t val) +{ + union { + float64 f; + uint64_t i; + } u; + + u.i = val; + FT0 = float64_to_floatx80(u.f, &env->fp_status); +} + +void helper_fildl_FT0(int32_t val) +{ + FT0 = int32_to_floatx80(val, &env->fp_status); +} + +void helper_flds_ST0(uint32_t val) +{ + int new_fpstt; + union { + float32 f; + uint32_t i; + } u; + + new_fpstt = (env->fpstt - 1) & 7; + u.i = val; + env->fpregs[new_fpstt].d = float32_to_floatx80(u.f, &env->fp_status); + env->fpstt = new_fpstt; + env->fptags[new_fpstt] = 0; /* validate stack entry */ +} + +void helper_fldl_ST0(uint64_t val) +{ + int new_fpstt; + union { + float64 f; + uint64_t i; + } u; + + new_fpstt = (env->fpstt - 1) & 7; + u.i = val; + env->fpregs[new_fpstt].d = float64_to_floatx80(u.f, &env->fp_status); + env->fpstt = new_fpstt; + env->fptags[new_fpstt] = 0; /* validate stack entry */ +} + +void helper_fildl_ST0(int32_t val) +{ + int new_fpstt; + + new_fpstt = (env->fpstt - 1) & 7; + env->fpregs[new_fpstt].d = int32_to_floatx80(val, &env->fp_status); + env->fpstt = new_fpstt; + env->fptags[new_fpstt] = 0; /* validate stack entry */ +} + +void helper_fildll_ST0(int64_t val) +{ + int new_fpstt; + + new_fpstt = (env->fpstt - 1) & 7; + env->fpregs[new_fpstt].d = int64_to_floatx80(val, &env->fp_status); + env->fpstt = new_fpstt; + env->fptags[new_fpstt] = 0; /* validate stack entry */ +} + +uint32_t helper_fsts_ST0(void) +{ + union { + float32 f; + uint32_t i; + } u; + + u.f = floatx80_to_float32(ST0, &env->fp_status); + return u.i; +} + +uint64_t helper_fstl_ST0(void) +{ + union { + float64 f; + uint64_t i; + } u; + + u.f = floatx80_to_float64(ST0, &env->fp_status); + return u.i; +} + +int32_t helper_fist_ST0(void) +{ + int32_t val; + + val = floatx80_to_int32(ST0, &env->fp_status); + if (val != (int16_t)val) { + val = -32768; + } + return val; +} + +int32_t helper_fistl_ST0(void) +{ + int32_t val; + + val = floatx80_to_int32(ST0, &env->fp_status); + return val; +} + +int64_t helper_fistll_ST0(void) +{ + int64_t val; + + val = floatx80_to_int64(ST0, &env->fp_status); + return val; +} + +int32_t helper_fistt_ST0(void) +{ + int32_t val; + + val = floatx80_to_int32_round_to_zero(ST0, &env->fp_status); + if (val != (int16_t)val) { + val = -32768; + } + return val; +} + +int32_t helper_fisttl_ST0(void) +{ + int32_t val; + + val = floatx80_to_int32_round_to_zero(ST0, &env->fp_status); + return val; +} + +int64_t helper_fisttll_ST0(void) +{ + int64_t val; + + val = floatx80_to_int64_round_to_zero(ST0, &env->fp_status); + return val; +} + +void helper_fldt_ST0(target_ulong ptr) +{ + int new_fpstt; + + new_fpstt = (env->fpstt - 1) & 7; + env->fpregs[new_fpstt].d = helper_fldt(ptr); + env->fpstt = new_fpstt; + env->fptags[new_fpstt] = 0; /* validate stack entry */ +} + +void helper_fstt_ST0(target_ulong ptr) +{ + helper_fstt(ST0, ptr); +} + +void helper_fpush(void) +{ + fpush(); +} + +void helper_fpop(void) +{ + fpop(); +} + +void helper_fdecstp(void) +{ + env->fpstt = (env->fpstt - 1) & 7; + env->fpus &= ~0x4700; +} + +void helper_fincstp(void) +{ + env->fpstt = (env->fpstt + 1) & 7; + env->fpus &= ~0x4700; +} + +/* FPU move */ + +void helper_ffree_STN(int st_index) +{ + env->fptags[(env->fpstt + st_index) & 7] = 1; +} + +void helper_fmov_ST0_FT0(void) +{ + ST0 = FT0; +} + +void helper_fmov_FT0_STN(int st_index) +{ + FT0 = ST(st_index); +} + +void helper_fmov_ST0_STN(int st_index) +{ + ST0 = ST(st_index); +} + +void helper_fmov_STN_ST0(int st_index) +{ + ST(st_index) = ST0; +} + +void helper_fxchg_ST0_STN(int st_index) +{ + floatx80 tmp; + + tmp = ST(st_index); + ST(st_index) = ST0; + ST0 = tmp; +} + +/* FPU operations */ + +static const int fcom_ccval[4] = {0x0100, 0x4000, 0x0000, 0x4500}; + +void helper_fcom_ST0_FT0(void) +{ + int ret; + + ret = floatx80_compare(ST0, FT0, &env->fp_status); + env->fpus = (env->fpus & ~0x4500) | fcom_ccval[ret + 1]; +} + +void helper_fucom_ST0_FT0(void) +{ + int ret; + + ret = floatx80_compare_quiet(ST0, FT0, &env->fp_status); + env->fpus = (env->fpus & ~0x4500) | fcom_ccval[ret + 1]; +} + +static const int fcomi_ccval[4] = {CC_C, CC_Z, 0, CC_Z | CC_P | CC_C}; + +void helper_fcomi_ST0_FT0(void) +{ + int eflags; + int ret; + + ret = floatx80_compare(ST0, FT0, &env->fp_status); + eflags = helper_cc_compute_all(CC_OP); + eflags = (eflags & ~(CC_Z | CC_P | CC_C)) | fcomi_ccval[ret + 1]; + CC_SRC = eflags; +} + +void helper_fucomi_ST0_FT0(void) +{ + int eflags; + int ret; + + ret = floatx80_compare_quiet(ST0, FT0, &env->fp_status); + eflags = helper_cc_compute_all(CC_OP); + eflags = (eflags & ~(CC_Z | CC_P | CC_C)) | fcomi_ccval[ret + 1]; + CC_SRC = eflags; +} + +void helper_fadd_ST0_FT0(void) +{ + ST0 = floatx80_add(ST0, FT0, &env->fp_status); +} + +void helper_fmul_ST0_FT0(void) +{ + ST0 = floatx80_mul(ST0, FT0, &env->fp_status); +} + +void helper_fsub_ST0_FT0(void) +{ + ST0 = floatx80_sub(ST0, FT0, &env->fp_status); +} + +void helper_fsubr_ST0_FT0(void) +{ + ST0 = floatx80_sub(FT0, ST0, &env->fp_status); +} + +void helper_fdiv_ST0_FT0(void) +{ + ST0 = helper_fdiv(ST0, FT0); +} + +void helper_fdivr_ST0_FT0(void) +{ + ST0 = helper_fdiv(FT0, ST0); +} + +/* fp operations between STN and ST0 */ + +void helper_fadd_STN_ST0(int st_index) +{ + ST(st_index) = floatx80_add(ST(st_index), ST0, &env->fp_status); +} + +void helper_fmul_STN_ST0(int st_index) +{ + ST(st_index) = floatx80_mul(ST(st_index), ST0, &env->fp_status); +} + +void helper_fsub_STN_ST0(int st_index) +{ + ST(st_index) = floatx80_sub(ST(st_index), ST0, &env->fp_status); +} + +void helper_fsubr_STN_ST0(int st_index) +{ + ST(st_index) = floatx80_sub(ST0, ST(st_index), &env->fp_status); +} + +void helper_fdiv_STN_ST0(int st_index) +{ + floatx80 *p; + + p = &ST(st_index); + *p = helper_fdiv(*p, ST0); +} + +void helper_fdivr_STN_ST0(int st_index) +{ + floatx80 *p; + + p = &ST(st_index); + *p = helper_fdiv(ST0, *p); +} + +/* misc FPU operations */ +void helper_fchs_ST0(void) +{ + ST0 = floatx80_chs(ST0); +} + +void helper_fabs_ST0(void) +{ + ST0 = floatx80_abs(ST0); +} + +void helper_fld1_ST0(void) +{ + ST0 = floatx80_one; +} + +void helper_fldl2t_ST0(void) +{ + ST0 = floatx80_l2t; +} + +void helper_fldl2e_ST0(void) +{ + ST0 = floatx80_l2e; +} + +void helper_fldpi_ST0(void) +{ + ST0 = floatx80_pi; +} + +void helper_fldlg2_ST0(void) +{ + ST0 = floatx80_lg2; +} + +void helper_fldln2_ST0(void) +{ + ST0 = floatx80_ln2; +} + +void helper_fldz_ST0(void) +{ + ST0 = floatx80_zero; +} + +void helper_fldz_FT0(void) +{ + FT0 = floatx80_zero; +} + +uint32_t helper_fnstsw(void) +{ + return (env->fpus & ~0x3800) | (env->fpstt & 0x7) << 11; +} + +uint32_t helper_fnstcw(void) +{ + return env->fpuc; +} + +static void update_fp_status(void) +{ + int rnd_type; + + /* set rounding mode */ + switch (env->fpuc & FPU_RC_MASK) { + default: + case FPU_RC_NEAR: + rnd_type = float_round_nearest_even; + break; + case FPU_RC_DOWN: + rnd_type = float_round_down; + break; + case FPU_RC_UP: + rnd_type = float_round_up; + break; + case FPU_RC_CHOP: + rnd_type = float_round_to_zero; + break; + } + set_float_rounding_mode(rnd_type, &env->fp_status); + switch ((env->fpuc >> 8) & 3) { + case 0: + rnd_type = 32; + break; + case 2: + rnd_type = 64; + break; + case 3: + default: + rnd_type = 80; + break; + } + set_floatx80_rounding_precision(rnd_type, &env->fp_status); +} + +void helper_fldcw(uint32_t val) +{ + env->fpuc = val; + update_fp_status(); +} + +void helper_fclex(void) +{ + env->fpus &= 0x7f00; +} + +void helper_fwait(void) +{ + if (env->fpus & FPUS_SE) { + fpu_raise_exception(); + } +} + +void helper_fninit(void) +{ + env->fpus = 0; + env->fpstt = 0; + env->fpuc = 0x37f; + env->fptags[0] = 1; + env->fptags[1] = 1; + env->fptags[2] = 1; + env->fptags[3] = 1; + env->fptags[4] = 1; + env->fptags[5] = 1; + env->fptags[6] = 1; + env->fptags[7] = 1; +} + +/* BCD ops */ + +void helper_fbld_ST0(target_ulong ptr) +{ + floatx80 tmp; + uint64_t val; + unsigned int v; + int i; + + val = 0; + for (i = 8; i >= 0; i--) { + v = ldub(ptr + i); + val = (val * 100) + ((v >> 4) * 10) + (v & 0xf); + } + tmp = int64_to_floatx80(val, &env->fp_status); + if (ldub(ptr + 9) & 0x80) { + floatx80_chs(tmp); + } + fpush(); + ST0 = tmp; +} + +void helper_fbst_ST0(target_ulong ptr) +{ + int v; + target_ulong mem_ref, mem_end; + int64_t val; + + val = floatx80_to_int64(ST0, &env->fp_status); + mem_ref = ptr; + mem_end = mem_ref + 9; + if (val < 0) { + stb(mem_end, 0x80); + val = -val; + } else { + stb(mem_end, 0x00); + } + while (mem_ref < mem_end) { + if (val == 0) { + break; + } + v = val % 100; + val = val / 100; + v = ((v / 10) << 4) | (v % 10); + stb(mem_ref++, v); + } + while (mem_ref < mem_end) { + stb(mem_ref++, 0); + } +} + +void helper_f2xm1(void) +{ + double val = floatx80_to_double(ST0); + + val = pow(2.0, val) - 1.0; + ST0 = double_to_floatx80(val); +} + +void helper_fyl2x(void) +{ + double fptemp = floatx80_to_double(ST0); + + if (fptemp > 0.0) { + fptemp = log(fptemp) / log(2.0); /* log2(ST) */ + fptemp *= floatx80_to_double(ST1); + ST1 = double_to_floatx80(fptemp); + fpop(); + } else { + env->fpus &= ~0x4700; + env->fpus |= 0x400; + } +} + +void helper_fptan(void) +{ + double fptemp = floatx80_to_double(ST0); + + if ((fptemp > MAXTAN) || (fptemp < -MAXTAN)) { + env->fpus |= 0x400; + } else { + fptemp = tan(fptemp); + ST0 = double_to_floatx80(fptemp); + fpush(); + ST0 = floatx80_one; + env->fpus &= ~0x400; /* C2 <-- 0 */ + /* the above code is for |arg| < 2**52 only */ + } +} + +void helper_fpatan(void) +{ + double fptemp, fpsrcop; + + fpsrcop = floatx80_to_double(ST1); + fptemp = floatx80_to_double(ST0); + ST1 = double_to_floatx80(atan2(fpsrcop, fptemp)); + fpop(); +} + +void helper_fxtract(void) +{ + CPU_LDoubleU temp; + + temp.d = ST0; + + if (floatx80_is_zero(ST0)) { + /* Easy way to generate -inf and raising division by 0 exception */ + ST0 = floatx80_div(floatx80_chs(floatx80_one), floatx80_zero, + &env->fp_status); + fpush(); + ST0 = temp.d; + } else { + int expdif; + + expdif = EXPD(temp) - EXPBIAS; + /* DP exponent bias */ + ST0 = int32_to_floatx80(expdif, &env->fp_status); + fpush(); + BIASEXPONENT(temp); + ST0 = temp.d; + } +} + +void helper_fprem1(void) +{ + double st0, st1, dblq, fpsrcop, fptemp; + CPU_LDoubleU fpsrcop1, fptemp1; + int expdif; + signed long long int q; + + st0 = floatx80_to_double(ST0); + st1 = floatx80_to_double(ST1); + + if (isinf(st0) || isnan(st0) || isnan(st1) || (st1 == 0.0)) { + ST0 = double_to_floatx80(0.0 / 0.0); /* NaN */ + env->fpus &= ~0x4700; /* (C3,C2,C1,C0) <-- 0000 */ + return; + } + + fpsrcop = st0; + fptemp = st1; + fpsrcop1.d = ST0; + fptemp1.d = ST1; + expdif = EXPD(fpsrcop1) - EXPD(fptemp1); + + if (expdif < 0) { + /* optimisation? taken from the AMD docs */ + env->fpus &= ~0x4700; /* (C3,C2,C1,C0) <-- 0000 */ + /* ST0 is unchanged */ + return; + } + + if (expdif < 53) { + dblq = fpsrcop / fptemp; + /* round dblq towards nearest integer */ + dblq = rint(dblq); + st0 = fpsrcop - fptemp * dblq; + + /* convert dblq to q by truncating towards zero */ + if (dblq < 0.0) { + q = (signed long long int)(-dblq); + } else { + q = (signed long long int)dblq; + } + + env->fpus &= ~0x4700; /* (C3,C2,C1,C0) <-- 0000 */ + /* (C0,C3,C1) <-- (q2,q1,q0) */ + env->fpus |= (q & 0x4) << (8 - 2); /* (C0) <-- q2 */ + env->fpus |= (q & 0x2) << (14 - 1); /* (C3) <-- q1 */ + env->fpus |= (q & 0x1) << (9 - 0); /* (C1) <-- q0 */ + } else { + env->fpus |= 0x400; /* C2 <-- 1 */ + fptemp = pow(2.0, expdif - 50); + fpsrcop = (st0 / st1) / fptemp; + /* fpsrcop = integer obtained by chopping */ + fpsrcop = (fpsrcop < 0.0) ? + -(floor(fabs(fpsrcop))) : floor(fpsrcop); + st0 -= (st1 * fpsrcop * fptemp); + } + ST0 = double_to_floatx80(st0); +} + +void helper_fprem(void) +{ + double st0, st1, dblq, fpsrcop, fptemp; + CPU_LDoubleU fpsrcop1, fptemp1; + int expdif; + signed long long int q; + + st0 = floatx80_to_double(ST0); + st1 = floatx80_to_double(ST1); + + if (isinf(st0) || isnan(st0) || isnan(st1) || (st1 == 0.0)) { + ST0 = double_to_floatx80(0.0 / 0.0); /* NaN */ + env->fpus &= ~0x4700; /* (C3,C2,C1,C0) <-- 0000 */ + return; + } + + fpsrcop = st0; + fptemp = st1; + fpsrcop1.d = ST0; + fptemp1.d = ST1; + expdif = EXPD(fpsrcop1) - EXPD(fptemp1); + + if (expdif < 0) { + /* optimisation? taken from the AMD docs */ + env->fpus &= ~0x4700; /* (C3,C2,C1,C0) <-- 0000 */ + /* ST0 is unchanged */ + return; + } + + if (expdif < 53) { + dblq = fpsrcop / fptemp; /* ST0 / ST1 */ + /* round dblq towards zero */ + dblq = (dblq < 0.0) ? ceil(dblq) : floor(dblq); + st0 = fpsrcop - fptemp * dblq; /* fpsrcop is ST0 */ + + /* convert dblq to q by truncating towards zero */ + if (dblq < 0.0) { + q = (signed long long int)(-dblq); + } else { + q = (signed long long int)dblq; + } + + env->fpus &= ~0x4700; /* (C3,C2,C1,C0) <-- 0000 */ + /* (C0,C3,C1) <-- (q2,q1,q0) */ + env->fpus |= (q & 0x4) << (8 - 2); /* (C0) <-- q2 */ + env->fpus |= (q & 0x2) << (14 - 1); /* (C3) <-- q1 */ + env->fpus |= (q & 0x1) << (9 - 0); /* (C1) <-- q0 */ + } else { + int N = 32 + (expdif % 32); /* as per AMD docs */ + + env->fpus |= 0x400; /* C2 <-- 1 */ + fptemp = pow(2.0, (double)(expdif - N)); + fpsrcop = (st0 / st1) / fptemp; + /* fpsrcop = integer obtained by chopping */ + fpsrcop = (fpsrcop < 0.0) ? + -(floor(fabs(fpsrcop))) : floor(fpsrcop); + st0 -= (st1 * fpsrcop * fptemp); + } + ST0 = double_to_floatx80(st0); +} + +void helper_fyl2xp1(void) +{ + double fptemp = floatx80_to_double(ST0); + + if ((fptemp + 1.0) > 0.0) { + fptemp = log(fptemp + 1.0) / log(2.0); /* log2(ST + 1.0) */ + fptemp *= floatx80_to_double(ST1); + ST1 = double_to_floatx80(fptemp); + fpop(); + } else { + env->fpus &= ~0x4700; + env->fpus |= 0x400; + } +} + +void helper_fsqrt(void) +{ + if (floatx80_is_neg(ST0)) { + env->fpus &= ~0x4700; /* (C3,C2,C1,C0) <-- 0000 */ + env->fpus |= 0x400; + } + ST0 = floatx80_sqrt(ST0, &env->fp_status); +} + +void helper_fsincos(void) +{ + double fptemp = floatx80_to_double(ST0); + + if ((fptemp > MAXTAN) || (fptemp < -MAXTAN)) { + env->fpus |= 0x400; + } else { + ST0 = double_to_floatx80(sin(fptemp)); + fpush(); + ST0 = double_to_floatx80(cos(fptemp)); + env->fpus &= ~0x400; /* C2 <-- 0 */ + /* the above code is for |arg| < 2**63 only */ + } +} + +void helper_frndint(void) +{ + ST0 = floatx80_round_to_int(ST0, &env->fp_status); +} + +void helper_fscale(void) +{ + if (floatx80_is_any_nan(ST1)) { + ST0 = ST1; + } else { + int n = floatx80_to_int32_round_to_zero(ST1, &env->fp_status); + ST0 = floatx80_scalbn(ST0, n, &env->fp_status); + } +} + +void helper_fsin(void) +{ + double fptemp = floatx80_to_double(ST0); + + if ((fptemp > MAXTAN) || (fptemp < -MAXTAN)) { + env->fpus |= 0x400; + } else { + ST0 = double_to_floatx80(sin(fptemp)); + env->fpus &= ~0x400; /* C2 <-- 0 */ + /* the above code is for |arg| < 2**53 only */ + } +} + +void helper_fcos(void) +{ + double fptemp = floatx80_to_double(ST0); + + if ((fptemp > MAXTAN) || (fptemp < -MAXTAN)) { + env->fpus |= 0x400; + } else { + ST0 = double_to_floatx80(cos(fptemp)); + env->fpus &= ~0x400; /* C2 <-- 0 */ + /* the above code is for |arg| < 2**63 only */ + } +} + +void helper_fxam_ST0(void) +{ + CPU_LDoubleU temp; + int expdif; + + temp.d = ST0; + + env->fpus &= ~0x4700; /* (C3,C2,C1,C0) <-- 0000 */ + if (SIGND(temp)) { + env->fpus |= 0x200; /* C1 <-- 1 */ + } + + /* XXX: test fptags too */ + expdif = EXPD(temp); + if (expdif == MAXEXPD) { + if (MANTD(temp) == 0x8000000000000000ULL) { + env->fpus |= 0x500; /* Infinity */ + } else { + env->fpus |= 0x100; /* NaN */ + } + } else if (expdif == 0) { + if (MANTD(temp) == 0) { + env->fpus |= 0x4000; /* Zero */ + } else { + env->fpus |= 0x4400; /* Denormal */ + } + } else { + env->fpus |= 0x400; + } +} + +void helper_fstenv(target_ulong ptr, int data32) +{ + int fpus, fptag, exp, i; + uint64_t mant; + CPU_LDoubleU tmp; + + fpus = (env->fpus & ~0x3800) | (env->fpstt & 0x7) << 11; + fptag = 0; + for (i = 7; i >= 0; i--) { + fptag <<= 2; + if (env->fptags[i]) { + fptag |= 3; + } else { + tmp.d = env->fpregs[i].d; + exp = EXPD(tmp); + mant = MANTD(tmp); + if (exp == 0 && mant == 0) { + /* zero */ + fptag |= 1; + } else if (exp == 0 || exp == MAXEXPD + || (mant & (1LL << 63)) == 0) { + /* NaNs, infinity, denormal */ + fptag |= 2; + } + } + } + if (data32) { + /* 32 bit */ + stl(ptr, env->fpuc); + stl(ptr + 4, fpus); + stl(ptr + 8, fptag); + stl(ptr + 12, 0); /* fpip */ + stl(ptr + 16, 0); /* fpcs */ + stl(ptr + 20, 0); /* fpoo */ + stl(ptr + 24, 0); /* fpos */ + } else { + /* 16 bit */ + stw(ptr, env->fpuc); + stw(ptr + 2, fpus); + stw(ptr + 4, fptag); + stw(ptr + 6, 0); + stw(ptr + 8, 0); + stw(ptr + 10, 0); + stw(ptr + 12, 0); + } +} + +void helper_fldenv(target_ulong ptr, int data32) +{ + int i, fpus, fptag; + + if (data32) { + env->fpuc = lduw(ptr); + fpus = lduw(ptr + 4); + fptag = lduw(ptr + 8); + } else { + env->fpuc = lduw(ptr); + fpus = lduw(ptr + 2); + fptag = lduw(ptr + 4); + } + env->fpstt = (fpus >> 11) & 7; + env->fpus = fpus & ~0x3800; + for (i = 0; i < 8; i++) { + env->fptags[i] = ((fptag & 3) == 3); + fptag >>= 2; + } +} + +void helper_fsave(target_ulong ptr, int data32) +{ + floatx80 tmp; + int i; + + helper_fstenv(ptr, data32); + + ptr += (14 << data32); + for (i = 0; i < 8; i++) { + tmp = ST(i); + helper_fstt(tmp, ptr); + ptr += 10; + } + + /* fninit */ + env->fpus = 0; + env->fpstt = 0; + env->fpuc = 0x37f; + env->fptags[0] = 1; + env->fptags[1] = 1; + env->fptags[2] = 1; + env->fptags[3] = 1; + env->fptags[4] = 1; + env->fptags[5] = 1; + env->fptags[6] = 1; + env->fptags[7] = 1; +} + +void helper_frstor(target_ulong ptr, int data32) +{ + floatx80 tmp; + int i; + + helper_fldenv(ptr, data32); + ptr += (14 << data32); + + for (i = 0; i < 8; i++) { + tmp = helper_fldt(ptr); + ST(i) = tmp; + ptr += 10; + } +} + +#if defined(CONFIG_USER_ONLY) +void cpu_x86_fsave(CPUX86State *s, target_ulong ptr, int data32) +{ + CPUX86State *saved_env; + + saved_env = env; + env = s; + + helper_fsave(ptr, data32); + + env = saved_env; +} + +void cpu_x86_frstor(CPUX86State *s, target_ulong ptr, int data32) +{ + CPUX86State *saved_env; + + saved_env = env; + env = s; + + helper_frstor(ptr, data32); + + env = saved_env; +} +#endif + +void helper_fxsave(target_ulong ptr, int data64) +{ + int fpus, fptag, i, nb_xmm_regs; + floatx80 tmp; + target_ulong addr; + + /* The operand must be 16 byte aligned */ + if (ptr & 0xf) { + raise_exception(env, EXCP0D_GPF); + } + + fpus = (env->fpus & ~0x3800) | (env->fpstt & 0x7) << 11; + fptag = 0; + for (i = 0; i < 8; i++) { + fptag |= (env->fptags[i] << i); + } + stw(ptr, env->fpuc); + stw(ptr + 2, fpus); + stw(ptr + 4, fptag ^ 0xff); +#ifdef TARGET_X86_64 + if (data64) { + stq(ptr + 0x08, 0); /* rip */ + stq(ptr + 0x10, 0); /* rdp */ + } else +#endif + { + stl(ptr + 0x08, 0); /* eip */ + stl(ptr + 0x0c, 0); /* sel */ + stl(ptr + 0x10, 0); /* dp */ + stl(ptr + 0x14, 0); /* sel */ + } + + addr = ptr + 0x20; + for (i = 0; i < 8; i++) { + tmp = ST(i); + helper_fstt(tmp, addr); + addr += 16; + } + + if (env->cr[4] & CR4_OSFXSR_MASK) { + /* XXX: finish it */ + stl(ptr + 0x18, env->mxcsr); /* mxcsr */ + stl(ptr + 0x1c, 0x0000ffff); /* mxcsr_mask */ + if (env->hflags & HF_CS64_MASK) { + nb_xmm_regs = 16; + } else { + nb_xmm_regs = 8; + } + addr = ptr + 0xa0; + /* Fast FXSAVE leaves out the XMM registers */ + if (!(env->efer & MSR_EFER_FFXSR) + || (env->hflags & HF_CPL_MASK) + || !(env->hflags & HF_LMA_MASK)) { + for (i = 0; i < nb_xmm_regs; i++) { + stq(addr, env->xmm_regs[i].XMM_Q(0)); + stq(addr + 8, env->xmm_regs[i].XMM_Q(1)); + addr += 16; + } + } + } +} + +void helper_fxrstor(target_ulong ptr, int data64) +{ + int i, fpus, fptag, nb_xmm_regs; + floatx80 tmp; + target_ulong addr; + + /* The operand must be 16 byte aligned */ + if (ptr & 0xf) { + raise_exception(env, EXCP0D_GPF); + } + + env->fpuc = lduw(ptr); + fpus = lduw(ptr + 2); + fptag = lduw(ptr + 4); + env->fpstt = (fpus >> 11) & 7; + env->fpus = fpus & ~0x3800; + fptag ^= 0xff; + for (i = 0; i < 8; i++) { + env->fptags[i] = ((fptag >> i) & 1); + } + + addr = ptr + 0x20; + for (i = 0; i < 8; i++) { + tmp = helper_fldt(addr); + ST(i) = tmp; + addr += 16; + } + + if (env->cr[4] & CR4_OSFXSR_MASK) { + /* XXX: finish it */ + env->mxcsr = ldl(ptr + 0x18); + /* ldl(ptr + 0x1c); */ + if (env->hflags & HF_CS64_MASK) { + nb_xmm_regs = 16; + } else { + nb_xmm_regs = 8; + } + addr = ptr + 0xa0; + /* Fast FXRESTORE leaves out the XMM registers */ + if (!(env->efer & MSR_EFER_FFXSR) + || (env->hflags & HF_CPL_MASK) + || !(env->hflags & HF_LMA_MASK)) { + for (i = 0; i < nb_xmm_regs; i++) { + env->xmm_regs[i].XMM_Q(0) = ldq(addr); + env->xmm_regs[i].XMM_Q(1) = ldq(addr + 8); + addr += 16; + } + } + } +} + +void cpu_get_fp80(uint64_t *pmant, uint16_t *pexp, floatx80 f) +{ + CPU_LDoubleU temp; + + temp.d = f; + *pmant = temp.l.lower; + *pexp = temp.l.upper; +} + +floatx80 cpu_set_fp80(uint64_t mant, uint16_t upper) +{ + CPU_LDoubleU temp; + + temp.l.upper = upper; + temp.l.lower = mant; + return temp.d; +} + +/* MMX/SSE */ +/* XXX: optimize by storing fptt and fptags in the static cpu state */ + +#define SSE_DAZ 0x0040 +#define SSE_RC_MASK 0x6000 +#define SSE_RC_NEAR 0x0000 +#define SSE_RC_DOWN 0x2000 +#define SSE_RC_UP 0x4000 +#define SSE_RC_CHOP 0x6000 +#define SSE_FZ 0x8000 + +static void update_sse_status(void) +{ + int rnd_type; + + /* set rounding mode */ + switch (env->mxcsr & SSE_RC_MASK) { + default: + case SSE_RC_NEAR: + rnd_type = float_round_nearest_even; + break; + case SSE_RC_DOWN: + rnd_type = float_round_down; + break; + case SSE_RC_UP: + rnd_type = float_round_up; + break; + case SSE_RC_CHOP: + rnd_type = float_round_to_zero; + break; + } + set_float_rounding_mode(rnd_type, &env->sse_status); + + /* set denormals are zero */ + set_flush_inputs_to_zero((env->mxcsr & SSE_DAZ) ? 1 : 0, &env->sse_status); + + /* set flush to zero */ + set_flush_to_zero((env->mxcsr & SSE_FZ) ? 1 : 0, &env->fp_status); +} + +void helper_ldmxcsr(uint32_t val) +{ + env->mxcsr = val; + update_sse_status(); +} + +void helper_enter_mmx(void) +{ + env->fpstt = 0; + *(uint32_t *)(env->fptags) = 0; + *(uint32_t *)(env->fptags + 4) = 0; +} + +void helper_emms(void) +{ + /* set to empty state */ + *(uint32_t *)(env->fptags) = 0x01010101; + *(uint32_t *)(env->fptags + 4) = 0x01010101; +} + +/* XXX: suppress */ +void helper_movq(void *d, void *s) +{ + *(uint64_t *)d = *(uint64_t *)s; +} + +#define SHIFT 0 +#include "ops_sse.h" + +#define SHIFT 1 +#include "ops_sse.h" diff --git a/target-i386/op_helper.c b/target-i386/op_helper.c index 8625936..3b77972 100644 --- a/target-i386/op_helper.c +++ b/target-i386/op_helper.c @@ -17,7 +17,6 @@ * License along with this library; if not, see . */ -#include #include "cpu.h" #include "dyngen-exec.h" #include "host-utils.h" @@ -52,64 +51,6 @@ static inline target_long lshift(target_long x, int n) } } -#define FPU_RC_MASK 0xc00 -#define FPU_RC_NEAR 0x000 -#define FPU_RC_DOWN 0x400 -#define FPU_RC_UP 0x800 -#define FPU_RC_CHOP 0xc00 - -#define MAXTAN 9223372036854775808.0 - -/* the following deal with x86 long double-precision numbers */ -#define MAXEXPD 0x7fff -#define EXPBIAS 16383 -#define EXPD(fp) (fp.l.upper & 0x7fff) -#define SIGND(fp) ((fp.l.upper) & 0x8000) -#define MANTD(fp) (fp.l.lower) -#define BIASEXPONENT(fp) fp.l.upper = (fp.l.upper & ~(0x7fff)) | EXPBIAS - -static inline void fpush(void) -{ - env->fpstt = (env->fpstt - 1) & 7; - env->fptags[env->fpstt] = 0; /* validate stack entry */ -} - -static inline void fpop(void) -{ - env->fptags[env->fpstt] = 1; /* invalidate stack entry */ - env->fpstt = (env->fpstt + 1) & 7; -} - -static inline floatx80 helper_fldt(target_ulong ptr) -{ - CPU_LDoubleU temp; - - temp.l.lower = ldq(ptr); - temp.l.upper = lduw(ptr + 8); - return temp.d; -} - -static inline void helper_fstt(floatx80 f, target_ulong ptr) -{ - CPU_LDoubleU temp; - - temp.d = f; - stq(ptr, temp.l.lower); - stw(ptr + 8, temp.l.upper); -} - -#define FPUS_IE (1 << 0) -#define FPUS_DE (1 << 1) -#define FPUS_ZE (1 << 2) -#define FPUS_OE (1 << 3) -#define FPUS_UE (1 << 4) -#define FPUS_PE (1 << 5) -#define FPUS_SF (1 << 6) -#define FPUS_SE (1 << 7) -#define FPUS_B (1 << 15) - -#define FPUC_EM 0x3f - static inline uint32_t compute_eflags(void) { return env->eflags | helper_cc_compute_all(CC_OP) | (DF & DF_MASK); @@ -189,10 +130,6 @@ static const uint8_t rclb_table[32] = { 6, 7, 8, 0, 1, 2, 3, 4, }; -#define floatx80_lg2 make_floatx80(0x3ffd, 0x9a209a84fbcff799LL) -#define floatx80_l2e make_floatx80(0x3fff, 0xb8aa3b295c17f0bcLL) -#define floatx80_l2t make_floatx80(0x4000, 0xd49a784bcd1b8afeLL) - /* broken thread support */ static spinlock_t global_cpu_lock = SPIN_LOCK_UNLOCKED; @@ -3750,1431 +3687,289 @@ void helper_verw(target_ulong selector1) CC_SRC = eflags | CC_Z; } -/* x87 FPU helpers */ - -static inline double floatx80_to_double(floatx80 a) -{ - union { - float64 f64; - double d; - } u; - - u.f64 = floatx80_to_float64(a, &env->fp_status); - return u.d; -} - -static inline floatx80 double_to_floatx80(double a) -{ - union { - float64 f64; - double d; - } u; - - u.d = a; - return float64_to_floatx80(u.f64, &env->fp_status); -} - -static void fpu_set_exception(int mask) +#if defined(CONFIG_USER_ONLY) +void cpu_x86_load_seg(CPUX86State *s, int seg_reg, int selector) { - env->fpus |= mask; - if (env->fpus & (~env->fpuc & FPUC_EM)) { - env->fpus |= FPUS_SE | FPUS_B; - } -} + CPUX86State *saved_env; -static inline floatx80 helper_fdiv(floatx80 a, floatx80 b) -{ - if (floatx80_is_zero(b)) { - fpu_set_exception(FPUS_ZE); + saved_env = env; + env = s; + if (!(env->cr[0] & CR0_PE_MASK) || (env->eflags & VM_MASK)) { + selector &= 0xffff; + cpu_x86_load_seg_cache(env, seg_reg, selector, + (selector << 4), 0xffff, 0); + } else { + helper_load_seg(seg_reg, selector); } - return floatx80_div(a, b, &env->fp_status); + env = saved_env; } - -static void fpu_raise_exception(void) -{ - if (env->cr[0] & CR0_NE_MASK) { - raise_exception(env, EXCP10_COPR); - } -#if !defined(CONFIG_USER_ONLY) - else { - cpu_set_ferr(env); - } #endif -} - -void helper_flds_FT0(uint32_t val) -{ - union { - float32 f; - uint32_t i; - } u; - u.i = val; - FT0 = float32_to_floatx80(u.f, &env->fp_status); -} - -void helper_fldl_FT0(uint64_t val) +#ifdef TARGET_X86_64 +static void add128(uint64_t *plow, uint64_t *phigh, uint64_t a, uint64_t b) { - union { - float64 f; - uint64_t i; - } u; - - u.i = val; - FT0 = float64_to_floatx80(u.f, &env->fp_status); + *plow += a; + /* carry test */ + if (*plow < a) { + (*phigh)++; + } + *phigh += b; } -void helper_fildl_FT0(int32_t val) +static void neg128(uint64_t *plow, uint64_t *phigh) { - FT0 = int32_to_floatx80(val, &env->fp_status); + *plow = ~*plow; + *phigh = ~*phigh; + add128(plow, phigh, 1, 0); } -void helper_flds_ST0(uint32_t val) +/* return TRUE if overflow */ +static int div64(uint64_t *plow, uint64_t *phigh, uint64_t b) { - int new_fpstt; - union { - float32 f; - uint32_t i; - } u; + uint64_t q, r, a1, a0; + int i, qb, ab; - new_fpstt = (env->fpstt - 1) & 7; - u.i = val; - env->fpregs[new_fpstt].d = float32_to_floatx80(u.f, &env->fp_status); - env->fpstt = new_fpstt; - env->fptags[new_fpstt] = 0; /* validate stack entry */ + a0 = *plow; + a1 = *phigh; + if (a1 == 0) { + q = a0 / b; + r = a0 % b; + *plow = q; + *phigh = r; + } else { + if (a1 >= b) { + return 1; + } + /* XXX: use a better algorithm */ + for (i = 0; i < 64; i++) { + ab = a1 >> 63; + a1 = (a1 << 1) | (a0 >> 63); + if (ab || a1 >= b) { + a1 -= b; + qb = 1; + } else { + qb = 0; + } + a0 = (a0 << 1) | qb; + } +#if defined(DEBUG_MULDIV) + printf("div: 0x%016" PRIx64 "%016" PRIx64 " / 0x%016" PRIx64 + ": q=0x%016" PRIx64 " r=0x%016" PRIx64 "\n", + *phigh, *plow, b, a0, a1); +#endif + *plow = a0; + *phigh = a1; + } + return 0; } -void helper_fldl_ST0(uint64_t val) +/* return TRUE if overflow */ +static int idiv64(uint64_t *plow, uint64_t *phigh, int64_t b) { - int new_fpstt; - union { - float64 f; - uint64_t i; - } u; + int sa, sb; - new_fpstt = (env->fpstt - 1) & 7; - u.i = val; - env->fpregs[new_fpstt].d = float64_to_floatx80(u.f, &env->fp_status); - env->fpstt = new_fpstt; - env->fptags[new_fpstt] = 0; /* validate stack entry */ + sa = ((int64_t)*phigh < 0); + if (sa) { + neg128(plow, phigh); + } + sb = (b < 0); + if (sb) { + b = -b; + } + if (div64(plow, phigh, b) != 0) { + return 1; + } + if (sa ^ sb) { + if (*plow > (1ULL << 63)) { + return 1; + } + *plow = -*plow; + } else { + if (*plow >= (1ULL << 63)) { + return 1; + } + } + if (sa) { + *phigh = -*phigh; + } + return 0; } -void helper_fildl_ST0(int32_t val) +void helper_mulq_EAX_T0(target_ulong t0) { - int new_fpstt; + uint64_t r0, r1; - new_fpstt = (env->fpstt - 1) & 7; - env->fpregs[new_fpstt].d = int32_to_floatx80(val, &env->fp_status); - env->fpstt = new_fpstt; - env->fptags[new_fpstt] = 0; /* validate stack entry */ + mulu64(&r0, &r1, EAX, t0); + EAX = r0; + EDX = r1; + CC_DST = r0; + CC_SRC = r1; } -void helper_fildll_ST0(int64_t val) +void helper_imulq_EAX_T0(target_ulong t0) { - int new_fpstt; + uint64_t r0, r1; - new_fpstt = (env->fpstt - 1) & 7; - env->fpregs[new_fpstt].d = int64_to_floatx80(val, &env->fp_status); - env->fpstt = new_fpstt; - env->fptags[new_fpstt] = 0; /* validate stack entry */ + muls64(&r0, &r1, EAX, t0); + EAX = r0; + EDX = r1; + CC_DST = r0; + CC_SRC = ((int64_t)r1 != ((int64_t)r0 >> 63)); } -uint32_t helper_fsts_ST0(void) +target_ulong helper_imulq_T0_T1(target_ulong t0, target_ulong t1) { - union { - float32 f; - uint32_t i; - } u; + uint64_t r0, r1; - u.f = floatx80_to_float32(ST0, &env->fp_status); - return u.i; + muls64(&r0, &r1, t0, t1); + CC_DST = r0; + CC_SRC = ((int64_t)r1 != ((int64_t)r0 >> 63)); + return r0; } -uint64_t helper_fstl_ST0(void) +void helper_divq_EAX(target_ulong t0) { - union { - float64 f; - uint64_t i; - } u; + uint64_t r0, r1; - u.f = floatx80_to_float64(ST0, &env->fp_status); - return u.i; + if (t0 == 0) { + raise_exception(env, EXCP00_DIVZ); + } + r0 = EAX; + r1 = EDX; + if (div64(&r0, &r1, t0)) { + raise_exception(env, EXCP00_DIVZ); + } + EAX = r0; + EDX = r1; } -int32_t helper_fist_ST0(void) +void helper_idivq_EAX(target_ulong t0) { - int32_t val; + uint64_t r0, r1; - val = floatx80_to_int32(ST0, &env->fp_status); - if (val != (int16_t)val) { - val = -32768; + if (t0 == 0) { + raise_exception(env, EXCP00_DIVZ); } - return val; + r0 = EAX; + r1 = EDX; + if (idiv64(&r0, &r1, t0)) { + raise_exception(env, EXCP00_DIVZ); + } + EAX = r0; + EDX = r1; } +#endif -int32_t helper_fistl_ST0(void) +static void do_hlt(void) { - int32_t val; - - val = floatx80_to_int32(ST0, &env->fp_status); - return val; + env->hflags &= ~HF_INHIBIT_IRQ_MASK; /* needed if sti is just before */ + env->halted = 1; + env->exception_index = EXCP_HLT; + cpu_loop_exit(env); } -int64_t helper_fistll_ST0(void) +void helper_hlt(int next_eip_addend) { - int64_t val; + helper_svm_check_intercept_param(SVM_EXIT_HLT, 0); + EIP += next_eip_addend; - val = floatx80_to_int64(ST0, &env->fp_status); - return val; + do_hlt(); } -int32_t helper_fistt_ST0(void) +void helper_monitor(target_ulong ptr) { - int32_t val; - - val = floatx80_to_int32_round_to_zero(ST0, &env->fp_status); - if (val != (int16_t)val) { - val = -32768; + if ((uint32_t)ECX != 0) { + raise_exception(env, EXCP0D_GPF); } - return val; + /* XXX: store address? */ + helper_svm_check_intercept_param(SVM_EXIT_MONITOR, 0); } -int32_t helper_fisttl_ST0(void) +void helper_mwait(int next_eip_addend) { - int32_t val; + if ((uint32_t)ECX != 0) { + raise_exception(env, EXCP0D_GPF); + } + helper_svm_check_intercept_param(SVM_EXIT_MWAIT, 0); + EIP += next_eip_addend; - val = floatx80_to_int32_round_to_zero(ST0, &env->fp_status); - return val; + /* XXX: not complete but not completely erroneous */ + if (env->cpu_index != 0 || env->next_cpu != NULL) { + /* more than one CPU: do not sleep because another CPU may + wake this one */ + } else { + do_hlt(); + } } -int64_t helper_fisttll_ST0(void) +void helper_debug(void) { - int64_t val; - - val = floatx80_to_int64_round_to_zero(ST0, &env->fp_status); - return val; + env->exception_index = EXCP_DEBUG; + cpu_loop_exit(env); } -void helper_fldt_ST0(target_ulong ptr) +void helper_reset_rf(void) { - int new_fpstt; - - new_fpstt = (env->fpstt - 1) & 7; - env->fpregs[new_fpstt].d = helper_fldt(ptr); - env->fpstt = new_fpstt; - env->fptags[new_fpstt] = 0; /* validate stack entry */ + env->eflags &= ~RF_MASK; } -void helper_fstt_ST0(target_ulong ptr) +void helper_cli(void) { - helper_fstt(ST0, ptr); + env->eflags &= ~IF_MASK; } -void helper_fpush(void) +void helper_sti(void) { - fpush(); + env->eflags |= IF_MASK; } -void helper_fpop(void) +#if 0 +/* vm86plus instructions */ +void helper_cli_vm(void) { - fpop(); + env->eflags &= ~VIF_MASK; } -void helper_fdecstp(void) +void helper_sti_vm(void) { - env->fpstt = (env->fpstt - 1) & 7; - env->fpus &= ~0x4700; + env->eflags |= VIF_MASK; + if (env->eflags & VIP_MASK) { + raise_exception(env, EXCP0D_GPF); + } } +#endif -void helper_fincstp(void) +void helper_set_inhibit_irq(void) { - env->fpstt = (env->fpstt + 1) & 7; - env->fpus &= ~0x4700; + env->hflags |= HF_INHIBIT_IRQ_MASK; } -/* FPU move */ - -void helper_ffree_STN(int st_index) +void helper_reset_inhibit_irq(void) { - env->fptags[(env->fpstt + st_index) & 7] = 1; + env->hflags &= ~HF_INHIBIT_IRQ_MASK; } -void helper_fmov_ST0_FT0(void) +void helper_boundw(target_ulong a0, int v) { - ST0 = FT0; -} + int low, high; -void helper_fmov_FT0_STN(int st_index) -{ - FT0 = ST(st_index); + low = ldsw(a0); + high = ldsw(a0 + 2); + v = (int16_t)v; + if (v < low || v > high) { + raise_exception(env, EXCP05_BOUND); + } } -void helper_fmov_ST0_STN(int st_index) +void helper_boundl(target_ulong a0, int v) { - ST0 = ST(st_index); -} + int low, high; -void helper_fmov_STN_ST0(int st_index) -{ - ST(st_index) = ST0; -} - -void helper_fxchg_ST0_STN(int st_index) -{ - floatx80 tmp; - - tmp = ST(st_index); - ST(st_index) = ST0; - ST0 = tmp; -} - -/* FPU operations */ - -static const int fcom_ccval[4] = {0x0100, 0x4000, 0x0000, 0x4500}; - -void helper_fcom_ST0_FT0(void) -{ - int ret; - - ret = floatx80_compare(ST0, FT0, &env->fp_status); - env->fpus = (env->fpus & ~0x4500) | fcom_ccval[ret + 1]; -} - -void helper_fucom_ST0_FT0(void) -{ - int ret; - - ret = floatx80_compare_quiet(ST0, FT0, &env->fp_status); - env->fpus = (env->fpus & ~0x4500) | fcom_ccval[ret + 1]; -} - -static const int fcomi_ccval[4] = {CC_C, CC_Z, 0, CC_Z | CC_P | CC_C}; - -void helper_fcomi_ST0_FT0(void) -{ - int eflags; - int ret; - - ret = floatx80_compare(ST0, FT0, &env->fp_status); - eflags = helper_cc_compute_all(CC_OP); - eflags = (eflags & ~(CC_Z | CC_P | CC_C)) | fcomi_ccval[ret + 1]; - CC_SRC = eflags; -} - -void helper_fucomi_ST0_FT0(void) -{ - int eflags; - int ret; - - ret = floatx80_compare_quiet(ST0, FT0, &env->fp_status); - eflags = helper_cc_compute_all(CC_OP); - eflags = (eflags & ~(CC_Z | CC_P | CC_C)) | fcomi_ccval[ret + 1]; - CC_SRC = eflags; -} - -void helper_fadd_ST0_FT0(void) -{ - ST0 = floatx80_add(ST0, FT0, &env->fp_status); -} - -void helper_fmul_ST0_FT0(void) -{ - ST0 = floatx80_mul(ST0, FT0, &env->fp_status); -} - -void helper_fsub_ST0_FT0(void) -{ - ST0 = floatx80_sub(ST0, FT0, &env->fp_status); -} - -void helper_fsubr_ST0_FT0(void) -{ - ST0 = floatx80_sub(FT0, ST0, &env->fp_status); -} - -void helper_fdiv_ST0_FT0(void) -{ - ST0 = helper_fdiv(ST0, FT0); -} - -void helper_fdivr_ST0_FT0(void) -{ - ST0 = helper_fdiv(FT0, ST0); -} - -/* fp operations between STN and ST0 */ - -void helper_fadd_STN_ST0(int st_index) -{ - ST(st_index) = floatx80_add(ST(st_index), ST0, &env->fp_status); -} - -void helper_fmul_STN_ST0(int st_index) -{ - ST(st_index) = floatx80_mul(ST(st_index), ST0, &env->fp_status); -} - -void helper_fsub_STN_ST0(int st_index) -{ - ST(st_index) = floatx80_sub(ST(st_index), ST0, &env->fp_status); -} - -void helper_fsubr_STN_ST0(int st_index) -{ - ST(st_index) = floatx80_sub(ST0, ST(st_index), &env->fp_status); -} - -void helper_fdiv_STN_ST0(int st_index) -{ - floatx80 *p; - - p = &ST(st_index); - *p = helper_fdiv(*p, ST0); -} - -void helper_fdivr_STN_ST0(int st_index) -{ - floatx80 *p; - - p = &ST(st_index); - *p = helper_fdiv(ST0, *p); -} - -/* misc FPU operations */ -void helper_fchs_ST0(void) -{ - ST0 = floatx80_chs(ST0); -} - -void helper_fabs_ST0(void) -{ - ST0 = floatx80_abs(ST0); -} - -void helper_fld1_ST0(void) -{ - ST0 = floatx80_one; -} - -void helper_fldl2t_ST0(void) -{ - ST0 = floatx80_l2t; -} - -void helper_fldl2e_ST0(void) -{ - ST0 = floatx80_l2e; -} - -void helper_fldpi_ST0(void) -{ - ST0 = floatx80_pi; -} - -void helper_fldlg2_ST0(void) -{ - ST0 = floatx80_lg2; -} - -void helper_fldln2_ST0(void) -{ - ST0 = floatx80_ln2; -} - -void helper_fldz_ST0(void) -{ - ST0 = floatx80_zero; -} - -void helper_fldz_FT0(void) -{ - FT0 = floatx80_zero; -} - -uint32_t helper_fnstsw(void) -{ - return (env->fpus & ~0x3800) | (env->fpstt & 0x7) << 11; -} - -uint32_t helper_fnstcw(void) -{ - return env->fpuc; -} - -static void update_fp_status(void) -{ - int rnd_type; - - /* set rounding mode */ - switch (env->fpuc & FPU_RC_MASK) { - default: - case FPU_RC_NEAR: - rnd_type = float_round_nearest_even; - break; - case FPU_RC_DOWN: - rnd_type = float_round_down; - break; - case FPU_RC_UP: - rnd_type = float_round_up; - break; - case FPU_RC_CHOP: - rnd_type = float_round_to_zero; - break; - } - set_float_rounding_mode(rnd_type, &env->fp_status); - switch ((env->fpuc >> 8) & 3) { - case 0: - rnd_type = 32; - break; - case 2: - rnd_type = 64; - break; - case 3: - default: - rnd_type = 80; - break; - } - set_floatx80_rounding_precision(rnd_type, &env->fp_status); -} - -void helper_fldcw(uint32_t val) -{ - env->fpuc = val; - update_fp_status(); -} - -void helper_fclex(void) -{ - env->fpus &= 0x7f00; -} - -void helper_fwait(void) -{ - if (env->fpus & FPUS_SE) { - fpu_raise_exception(); - } -} - -void helper_fninit(void) -{ - env->fpus = 0; - env->fpstt = 0; - env->fpuc = 0x37f; - env->fptags[0] = 1; - env->fptags[1] = 1; - env->fptags[2] = 1; - env->fptags[3] = 1; - env->fptags[4] = 1; - env->fptags[5] = 1; - env->fptags[6] = 1; - env->fptags[7] = 1; -} - -/* BCD ops */ - -void helper_fbld_ST0(target_ulong ptr) -{ - floatx80 tmp; - uint64_t val; - unsigned int v; - int i; - - val = 0; - for (i = 8; i >= 0; i--) { - v = ldub(ptr + i); - val = (val * 100) + ((v >> 4) * 10) + (v & 0xf); - } - tmp = int64_to_floatx80(val, &env->fp_status); - if (ldub(ptr + 9) & 0x80) { - floatx80_chs(tmp); - } - fpush(); - ST0 = tmp; -} - -void helper_fbst_ST0(target_ulong ptr) -{ - int v; - target_ulong mem_ref, mem_end; - int64_t val; - - val = floatx80_to_int64(ST0, &env->fp_status); - mem_ref = ptr; - mem_end = mem_ref + 9; - if (val < 0) { - stb(mem_end, 0x80); - val = -val; - } else { - stb(mem_end, 0x00); - } - while (mem_ref < mem_end) { - if (val == 0) { - break; - } - v = val % 100; - val = val / 100; - v = ((v / 10) << 4) | (v % 10); - stb(mem_ref++, v); - } - while (mem_ref < mem_end) { - stb(mem_ref++, 0); - } -} - -void helper_f2xm1(void) -{ - double val = floatx80_to_double(ST0); - - val = pow(2.0, val) - 1.0; - ST0 = double_to_floatx80(val); -} - -void helper_fyl2x(void) -{ - double fptemp = floatx80_to_double(ST0); - - if (fptemp > 0.0) { - fptemp = log(fptemp) / log(2.0); /* log2(ST) */ - fptemp *= floatx80_to_double(ST1); - ST1 = double_to_floatx80(fptemp); - fpop(); - } else { - env->fpus &= ~0x4700; - env->fpus |= 0x400; - } -} - -void helper_fptan(void) -{ - double fptemp = floatx80_to_double(ST0); - - if ((fptemp > MAXTAN) || (fptemp < -MAXTAN)) { - env->fpus |= 0x400; - } else { - fptemp = tan(fptemp); - ST0 = double_to_floatx80(fptemp); - fpush(); - ST0 = floatx80_one; - env->fpus &= ~0x400; /* C2 <-- 0 */ - /* the above code is for |arg| < 2**52 only */ - } -} - -void helper_fpatan(void) -{ - double fptemp, fpsrcop; - - fpsrcop = floatx80_to_double(ST1); - fptemp = floatx80_to_double(ST0); - ST1 = double_to_floatx80(atan2(fpsrcop, fptemp)); - fpop(); -} - -void helper_fxtract(void) -{ - CPU_LDoubleU temp; - - temp.d = ST0; - - if (floatx80_is_zero(ST0)) { - /* Easy way to generate -inf and raising division by 0 exception */ - ST0 = floatx80_div(floatx80_chs(floatx80_one), floatx80_zero, - &env->fp_status); - fpush(); - ST0 = temp.d; - } else { - int expdif; - - expdif = EXPD(temp) - EXPBIAS; - /* DP exponent bias */ - ST0 = int32_to_floatx80(expdif, &env->fp_status); - fpush(); - BIASEXPONENT(temp); - ST0 = temp.d; - } -} - -void helper_fprem1(void) -{ - double st0, st1, dblq, fpsrcop, fptemp; - CPU_LDoubleU fpsrcop1, fptemp1; - int expdif; - signed long long int q; - - st0 = floatx80_to_double(ST0); - st1 = floatx80_to_double(ST1); - - if (isinf(st0) || isnan(st0) || isnan(st1) || (st1 == 0.0)) { - ST0 = double_to_floatx80(0.0 / 0.0); /* NaN */ - env->fpus &= ~0x4700; /* (C3,C2,C1,C0) <-- 0000 */ - return; - } - - fpsrcop = st0; - fptemp = st1; - fpsrcop1.d = ST0; - fptemp1.d = ST1; - expdif = EXPD(fpsrcop1) - EXPD(fptemp1); - - if (expdif < 0) { - /* optimisation? taken from the AMD docs */ - env->fpus &= ~0x4700; /* (C3,C2,C1,C0) <-- 0000 */ - /* ST0 is unchanged */ - return; - } - - if (expdif < 53) { - dblq = fpsrcop / fptemp; - /* round dblq towards nearest integer */ - dblq = rint(dblq); - st0 = fpsrcop - fptemp * dblq; - - /* convert dblq to q by truncating towards zero */ - if (dblq < 0.0) { - q = (signed long long int)(-dblq); - } else { - q = (signed long long int)dblq; - } - - env->fpus &= ~0x4700; /* (C3,C2,C1,C0) <-- 0000 */ - /* (C0,C3,C1) <-- (q2,q1,q0) */ - env->fpus |= (q & 0x4) << (8 - 2); /* (C0) <-- q2 */ - env->fpus |= (q & 0x2) << (14 - 1); /* (C3) <-- q1 */ - env->fpus |= (q & 0x1) << (9 - 0); /* (C1) <-- q0 */ - } else { - env->fpus |= 0x400; /* C2 <-- 1 */ - fptemp = pow(2.0, expdif - 50); - fpsrcop = (st0 / st1) / fptemp; - /* fpsrcop = integer obtained by chopping */ - fpsrcop = (fpsrcop < 0.0) ? - -(floor(fabs(fpsrcop))) : floor(fpsrcop); - st0 -= (st1 * fpsrcop * fptemp); - } - ST0 = double_to_floatx80(st0); -} - -void helper_fprem(void) -{ - double st0, st1, dblq, fpsrcop, fptemp; - CPU_LDoubleU fpsrcop1, fptemp1; - int expdif; - signed long long int q; - - st0 = floatx80_to_double(ST0); - st1 = floatx80_to_double(ST1); - - if (isinf(st0) || isnan(st0) || isnan(st1) || (st1 == 0.0)) { - ST0 = double_to_floatx80(0.0 / 0.0); /* NaN */ - env->fpus &= ~0x4700; /* (C3,C2,C1,C0) <-- 0000 */ - return; - } - - fpsrcop = st0; - fptemp = st1; - fpsrcop1.d = ST0; - fptemp1.d = ST1; - expdif = EXPD(fpsrcop1) - EXPD(fptemp1); - - if (expdif < 0) { - /* optimisation? taken from the AMD docs */ - env->fpus &= ~0x4700; /* (C3,C2,C1,C0) <-- 0000 */ - /* ST0 is unchanged */ - return; - } - - if (expdif < 53) { - dblq = fpsrcop / fptemp; /* ST0 / ST1 */ - /* round dblq towards zero */ - dblq = (dblq < 0.0) ? ceil(dblq) : floor(dblq); - st0 = fpsrcop - fptemp * dblq; /* fpsrcop is ST0 */ - - /* convert dblq to q by truncating towards zero */ - if (dblq < 0.0) { - q = (signed long long int)(-dblq); - } else { - q = (signed long long int)dblq; - } - - env->fpus &= ~0x4700; /* (C3,C2,C1,C0) <-- 0000 */ - /* (C0,C3,C1) <-- (q2,q1,q0) */ - env->fpus |= (q & 0x4) << (8 - 2); /* (C0) <-- q2 */ - env->fpus |= (q & 0x2) << (14 - 1); /* (C3) <-- q1 */ - env->fpus |= (q & 0x1) << (9 - 0); /* (C1) <-- q0 */ - } else { - int N = 32 + (expdif % 32); /* as per AMD docs */ - - env->fpus |= 0x400; /* C2 <-- 1 */ - fptemp = pow(2.0, (double)(expdif - N)); - fpsrcop = (st0 / st1) / fptemp; - /* fpsrcop = integer obtained by chopping */ - fpsrcop = (fpsrcop < 0.0) ? - -(floor(fabs(fpsrcop))) : floor(fpsrcop); - st0 -= (st1 * fpsrcop * fptemp); - } - ST0 = double_to_floatx80(st0); -} - -void helper_fyl2xp1(void) -{ - double fptemp = floatx80_to_double(ST0); - - if ((fptemp + 1.0) > 0.0) { - fptemp = log(fptemp + 1.0) / log(2.0); /* log2(ST + 1.0) */ - fptemp *= floatx80_to_double(ST1); - ST1 = double_to_floatx80(fptemp); - fpop(); - } else { - env->fpus &= ~0x4700; - env->fpus |= 0x400; - } -} - -void helper_fsqrt(void) -{ - if (floatx80_is_neg(ST0)) { - env->fpus &= ~0x4700; /* (C3,C2,C1,C0) <-- 0000 */ - env->fpus |= 0x400; - } - ST0 = floatx80_sqrt(ST0, &env->fp_status); -} - -void helper_fsincos(void) -{ - double fptemp = floatx80_to_double(ST0); - - if ((fptemp > MAXTAN) || (fptemp < -MAXTAN)) { - env->fpus |= 0x400; - } else { - ST0 = double_to_floatx80(sin(fptemp)); - fpush(); - ST0 = double_to_floatx80(cos(fptemp)); - env->fpus &= ~0x400; /* C2 <-- 0 */ - /* the above code is for |arg| < 2**63 only */ - } -} - -void helper_frndint(void) -{ - ST0 = floatx80_round_to_int(ST0, &env->fp_status); -} - -void helper_fscale(void) -{ - if (floatx80_is_any_nan(ST1)) { - ST0 = ST1; - } else { - int n = floatx80_to_int32_round_to_zero(ST1, &env->fp_status); - ST0 = floatx80_scalbn(ST0, n, &env->fp_status); - } -} - -void helper_fsin(void) -{ - double fptemp = floatx80_to_double(ST0); - - if ((fptemp > MAXTAN) || (fptemp < -MAXTAN)) { - env->fpus |= 0x400; - } else { - ST0 = double_to_floatx80(sin(fptemp)); - env->fpus &= ~0x400; /* C2 <-- 0 */ - /* the above code is for |arg| < 2**53 only */ - } -} - -void helper_fcos(void) -{ - double fptemp = floatx80_to_double(ST0); - - if ((fptemp > MAXTAN) || (fptemp < -MAXTAN)) { - env->fpus |= 0x400; - } else { - ST0 = double_to_floatx80(cos(fptemp)); - env->fpus &= ~0x400; /* C2 <-- 0 */ - /* the above code is for |arg| < 2**63 only */ - } -} - -void helper_fxam_ST0(void) -{ - CPU_LDoubleU temp; - int expdif; - - temp.d = ST0; - - env->fpus &= ~0x4700; /* (C3,C2,C1,C0) <-- 0000 */ - if (SIGND(temp)) { - env->fpus |= 0x200; /* C1 <-- 1 */ - } - - /* XXX: test fptags too */ - expdif = EXPD(temp); - if (expdif == MAXEXPD) { - if (MANTD(temp) == 0x8000000000000000ULL) { - env->fpus |= 0x500; /* Infinity */ - } else { - env->fpus |= 0x100; /* NaN */ - } - } else if (expdif == 0) { - if (MANTD(temp) == 0) { - env->fpus |= 0x4000; /* Zero */ - } else { - env->fpus |= 0x4400; /* Denormal */ - } - } else { - env->fpus |= 0x400; - } -} - -void helper_fstenv(target_ulong ptr, int data32) -{ - int fpus, fptag, exp, i; - uint64_t mant; - CPU_LDoubleU tmp; - - fpus = (env->fpus & ~0x3800) | (env->fpstt & 0x7) << 11; - fptag = 0; - for (i = 7; i >= 0; i--) { - fptag <<= 2; - if (env->fptags[i]) { - fptag |= 3; - } else { - tmp.d = env->fpregs[i].d; - exp = EXPD(tmp); - mant = MANTD(tmp); - if (exp == 0 && mant == 0) { - /* zero */ - fptag |= 1; - } else if (exp == 0 || exp == MAXEXPD - || (mant & (1LL << 63)) == 0) { - /* NaNs, infinity, denormal */ - fptag |= 2; - } - } - } - if (data32) { - /* 32 bit */ - stl(ptr, env->fpuc); - stl(ptr + 4, fpus); - stl(ptr + 8, fptag); - stl(ptr + 12, 0); /* fpip */ - stl(ptr + 16, 0); /* fpcs */ - stl(ptr + 20, 0); /* fpoo */ - stl(ptr + 24, 0); /* fpos */ - } else { - /* 16 bit */ - stw(ptr, env->fpuc); - stw(ptr + 2, fpus); - stw(ptr + 4, fptag); - stw(ptr + 6, 0); - stw(ptr + 8, 0); - stw(ptr + 10, 0); - stw(ptr + 12, 0); - } -} - -void helper_fldenv(target_ulong ptr, int data32) -{ - int i, fpus, fptag; - - if (data32) { - env->fpuc = lduw(ptr); - fpus = lduw(ptr + 4); - fptag = lduw(ptr + 8); - } else { - env->fpuc = lduw(ptr); - fpus = lduw(ptr + 2); - fptag = lduw(ptr + 4); - } - env->fpstt = (fpus >> 11) & 7; - env->fpus = fpus & ~0x3800; - for (i = 0; i < 8; i++) { - env->fptags[i] = ((fptag & 3) == 3); - fptag >>= 2; - } -} - -void helper_fsave(target_ulong ptr, int data32) -{ - floatx80 tmp; - int i; - - helper_fstenv(ptr, data32); - - ptr += (14 << data32); - for (i = 0; i < 8; i++) { - tmp = ST(i); - helper_fstt(tmp, ptr); - ptr += 10; - } - - /* fninit */ - env->fpus = 0; - env->fpstt = 0; - env->fpuc = 0x37f; - env->fptags[0] = 1; - env->fptags[1] = 1; - env->fptags[2] = 1; - env->fptags[3] = 1; - env->fptags[4] = 1; - env->fptags[5] = 1; - env->fptags[6] = 1; - env->fptags[7] = 1; -} - -void helper_frstor(target_ulong ptr, int data32) -{ - floatx80 tmp; - int i; - - helper_fldenv(ptr, data32); - ptr += (14 << data32); - - for (i = 0; i < 8; i++) { - tmp = helper_fldt(ptr); - ST(i) = tmp; - ptr += 10; - } -} - - -#if defined(CONFIG_USER_ONLY) -void cpu_x86_load_seg(CPUX86State *s, int seg_reg, int selector) -{ - CPUX86State *saved_env; - - saved_env = env; - env = s; - if (!(env->cr[0] & CR0_PE_MASK) || (env->eflags & VM_MASK)) { - selector &= 0xffff; - cpu_x86_load_seg_cache(env, seg_reg, selector, - (selector << 4), 0xffff, 0); - } else { - helper_load_seg(seg_reg, selector); - } - env = saved_env; -} - -void cpu_x86_fsave(CPUX86State *s, target_ulong ptr, int data32) -{ - CPUX86State *saved_env; - - saved_env = env; - env = s; - - helper_fsave(ptr, data32); - - env = saved_env; -} - -void cpu_x86_frstor(CPUX86State *s, target_ulong ptr, int data32) -{ - CPUX86State *saved_env; - - saved_env = env; - env = s; - - helper_frstor(ptr, data32); - - env = saved_env; -} -#endif - -void helper_fxsave(target_ulong ptr, int data64) -{ - int fpus, fptag, i, nb_xmm_regs; - floatx80 tmp; - target_ulong addr; - - /* The operand must be 16 byte aligned */ - if (ptr & 0xf) { - raise_exception(env, EXCP0D_GPF); - } - - fpus = (env->fpus & ~0x3800) | (env->fpstt & 0x7) << 11; - fptag = 0; - for (i = 0; i < 8; i++) { - fptag |= (env->fptags[i] << i); - } - stw(ptr, env->fpuc); - stw(ptr + 2, fpus); - stw(ptr + 4, fptag ^ 0xff); -#ifdef TARGET_X86_64 - if (data64) { - stq(ptr + 0x08, 0); /* rip */ - stq(ptr + 0x10, 0); /* rdp */ - } else -#endif - { - stl(ptr + 0x08, 0); /* eip */ - stl(ptr + 0x0c, 0); /* sel */ - stl(ptr + 0x10, 0); /* dp */ - stl(ptr + 0x14, 0); /* sel */ - } - - addr = ptr + 0x20; - for (i = 0; i < 8; i++) { - tmp = ST(i); - helper_fstt(tmp, addr); - addr += 16; - } - - if (env->cr[4] & CR4_OSFXSR_MASK) { - /* XXX: finish it */ - stl(ptr + 0x18, env->mxcsr); /* mxcsr */ - stl(ptr + 0x1c, 0x0000ffff); /* mxcsr_mask */ - if (env->hflags & HF_CS64_MASK) { - nb_xmm_regs = 16; - } else { - nb_xmm_regs = 8; - } - addr = ptr + 0xa0; - /* Fast FXSAVE leaves out the XMM registers */ - if (!(env->efer & MSR_EFER_FFXSR) - || (env->hflags & HF_CPL_MASK) - || !(env->hflags & HF_LMA_MASK)) { - for (i = 0; i < nb_xmm_regs; i++) { - stq(addr, env->xmm_regs[i].XMM_Q(0)); - stq(addr + 8, env->xmm_regs[i].XMM_Q(1)); - addr += 16; - } - } - } -} - -void helper_fxrstor(target_ulong ptr, int data64) -{ - int i, fpus, fptag, nb_xmm_regs; - floatx80 tmp; - target_ulong addr; - - /* The operand must be 16 byte aligned */ - if (ptr & 0xf) { - raise_exception(env, EXCP0D_GPF); - } - - env->fpuc = lduw(ptr); - fpus = lduw(ptr + 2); - fptag = lduw(ptr + 4); - env->fpstt = (fpus >> 11) & 7; - env->fpus = fpus & ~0x3800; - fptag ^= 0xff; - for (i = 0; i < 8; i++) { - env->fptags[i] = ((fptag >> i) & 1); - } - - addr = ptr + 0x20; - for (i = 0; i < 8; i++) { - tmp = helper_fldt(addr); - ST(i) = tmp; - addr += 16; - } - - if (env->cr[4] & CR4_OSFXSR_MASK) { - /* XXX: finish it */ - env->mxcsr = ldl(ptr + 0x18); - /* ldl(ptr + 0x1c); */ - if (env->hflags & HF_CS64_MASK) { - nb_xmm_regs = 16; - } else { - nb_xmm_regs = 8; - } - addr = ptr + 0xa0; - /* Fast FXRESTORE leaves out the XMM registers */ - if (!(env->efer & MSR_EFER_FFXSR) - || (env->hflags & HF_CPL_MASK) - || !(env->hflags & HF_LMA_MASK)) { - for (i = 0; i < nb_xmm_regs; i++) { - env->xmm_regs[i].XMM_Q(0) = ldq(addr); - env->xmm_regs[i].XMM_Q(1) = ldq(addr + 8); - addr += 16; - } - } - } -} - -void cpu_get_fp80(uint64_t *pmant, uint16_t *pexp, floatx80 f) -{ - CPU_LDoubleU temp; - - temp.d = f; - *pmant = temp.l.lower; - *pexp = temp.l.upper; -} - -floatx80 cpu_set_fp80(uint64_t mant, uint16_t upper) -{ - CPU_LDoubleU temp; - - temp.l.upper = upper; - temp.l.lower = mant; - return temp.d; -} - -#ifdef TARGET_X86_64 -static void add128(uint64_t *plow, uint64_t *phigh, uint64_t a, uint64_t b) -{ - *plow += a; - /* carry test */ - if (*plow < a) { - (*phigh)++; - } - *phigh += b; -} - -static void neg128(uint64_t *plow, uint64_t *phigh) -{ - *plow = ~*plow; - *phigh = ~*phigh; - add128(plow, phigh, 1, 0); -} - -/* return TRUE if overflow */ -static int div64(uint64_t *plow, uint64_t *phigh, uint64_t b) -{ - uint64_t q, r, a1, a0; - int i, qb, ab; - - a0 = *plow; - a1 = *phigh; - if (a1 == 0) { - q = a0 / b; - r = a0 % b; - *plow = q; - *phigh = r; - } else { - if (a1 >= b) { - return 1; - } - /* XXX: use a better algorithm */ - for (i = 0; i < 64; i++) { - ab = a1 >> 63; - a1 = (a1 << 1) | (a0 >> 63); - if (ab || a1 >= b) { - a1 -= b; - qb = 1; - } else { - qb = 0; - } - a0 = (a0 << 1) | qb; - } -#if defined(DEBUG_MULDIV) - printf("div: 0x%016" PRIx64 "%016" PRIx64 " / 0x%016" PRIx64 - ": q=0x%016" PRIx64 " r=0x%016" PRIx64 "\n", - *phigh, *plow, b, a0, a1); -#endif - *plow = a0; - *phigh = a1; - } - return 0; -} - -/* return TRUE if overflow */ -static int idiv64(uint64_t *plow, uint64_t *phigh, int64_t b) -{ - int sa, sb; - - sa = ((int64_t)*phigh < 0); - if (sa) { - neg128(plow, phigh); - } - sb = (b < 0); - if (sb) { - b = -b; - } - if (div64(plow, phigh, b) != 0) { - return 1; - } - if (sa ^ sb) { - if (*plow > (1ULL << 63)) { - return 1; - } - *plow = -*plow; - } else { - if (*plow >= (1ULL << 63)) { - return 1; - } - } - if (sa) { - *phigh = -*phigh; - } - return 0; -} - -void helper_mulq_EAX_T0(target_ulong t0) -{ - uint64_t r0, r1; - - mulu64(&r0, &r1, EAX, t0); - EAX = r0; - EDX = r1; - CC_DST = r0; - CC_SRC = r1; -} - -void helper_imulq_EAX_T0(target_ulong t0) -{ - uint64_t r0, r1; - - muls64(&r0, &r1, EAX, t0); - EAX = r0; - EDX = r1; - CC_DST = r0; - CC_SRC = ((int64_t)r1 != ((int64_t)r0 >> 63)); -} - -target_ulong helper_imulq_T0_T1(target_ulong t0, target_ulong t1) -{ - uint64_t r0, r1; - - muls64(&r0, &r1, t0, t1); - CC_DST = r0; - CC_SRC = ((int64_t)r1 != ((int64_t)r0 >> 63)); - return r0; -} - -void helper_divq_EAX(target_ulong t0) -{ - uint64_t r0, r1; - - if (t0 == 0) { - raise_exception(env, EXCP00_DIVZ); - } - r0 = EAX; - r1 = EDX; - if (div64(&r0, &r1, t0)) { - raise_exception(env, EXCP00_DIVZ); - } - EAX = r0; - EDX = r1; -} - -void helper_idivq_EAX(target_ulong t0) -{ - uint64_t r0, r1; - - if (t0 == 0) { - raise_exception(env, EXCP00_DIVZ); - } - r0 = EAX; - r1 = EDX; - if (idiv64(&r0, &r1, t0)) { - raise_exception(env, EXCP00_DIVZ); - } - EAX = r0; - EDX = r1; -} -#endif - -static void do_hlt(void) -{ - env->hflags &= ~HF_INHIBIT_IRQ_MASK; /* needed if sti is just before */ - env->halted = 1; - env->exception_index = EXCP_HLT; - cpu_loop_exit(env); -} - -void helper_hlt(int next_eip_addend) -{ - helper_svm_check_intercept_param(SVM_EXIT_HLT, 0); - EIP += next_eip_addend; - - do_hlt(); -} - -void helper_monitor(target_ulong ptr) -{ - if ((uint32_t)ECX != 0) { - raise_exception(env, EXCP0D_GPF); - } - /* XXX: store address? */ - helper_svm_check_intercept_param(SVM_EXIT_MONITOR, 0); -} - -void helper_mwait(int next_eip_addend) -{ - if ((uint32_t)ECX != 0) { - raise_exception(env, EXCP0D_GPF); - } - helper_svm_check_intercept_param(SVM_EXIT_MWAIT, 0); - EIP += next_eip_addend; - - /* XXX: not complete but not completely erroneous */ - if (env->cpu_index != 0 || env->next_cpu != NULL) { - /* more than one CPU: do not sleep because another CPU may - wake this one */ - } else { - do_hlt(); - } -} - -void helper_debug(void) -{ - env->exception_index = EXCP_DEBUG; - cpu_loop_exit(env); -} - -void helper_reset_rf(void) -{ - env->eflags &= ~RF_MASK; -} - -void helper_cli(void) -{ - env->eflags &= ~IF_MASK; -} - -void helper_sti(void) -{ - env->eflags |= IF_MASK; -} - -#if 0 -/* vm86plus instructions */ -void helper_cli_vm(void) -{ - env->eflags &= ~VIF_MASK; -} - -void helper_sti_vm(void) -{ - env->eflags |= VIF_MASK; - if (env->eflags & VIP_MASK) { - raise_exception(env, EXCP0D_GPF); - } -} -#endif - -void helper_set_inhibit_irq(void) -{ - env->hflags |= HF_INHIBIT_IRQ_MASK; -} - -void helper_reset_inhibit_irq(void) -{ - env->hflags &= ~HF_INHIBIT_IRQ_MASK; -} - -void helper_boundw(target_ulong a0, int v) -{ - int low, high; - - low = ldsw(a0); - high = ldsw(a0 + 2); - v = (int16_t)v; - if (v < low || v > high) { - raise_exception(env, EXCP05_BOUND); - } -} - -void helper_boundl(target_ulong a0, int v) -{ - int low, high; - - low = ldl(a0); - high = ldl(a0 + 4); - if (v < low || v > high) { - raise_exception(env, EXCP05_BOUND); - } + low = ldl(a0); + high = ldl(a0 + 4); + if (v < low || v > high) { + raise_exception(env, EXCP05_BOUND); + } } #if !defined(CONFIG_USER_ONLY) @@ -5919,78 +4714,6 @@ void cpu_vmexit(CPUX86State *nenv, uint32_t exit_code, uint64_t exit_info_1) #endif -/* MMX/SSE */ -/* XXX: optimize by storing fptt and fptags in the static cpu state */ - -#define SSE_DAZ 0x0040 -#define SSE_RC_MASK 0x6000 -#define SSE_RC_NEAR 0x0000 -#define SSE_RC_DOWN 0x2000 -#define SSE_RC_UP 0x4000 -#define SSE_RC_CHOP 0x6000 -#define SSE_FZ 0x8000 - -static void update_sse_status(void) -{ - int rnd_type; - - /* set rounding mode */ - switch (env->mxcsr & SSE_RC_MASK) { - default: - case SSE_RC_NEAR: - rnd_type = float_round_nearest_even; - break; - case SSE_RC_DOWN: - rnd_type = float_round_down; - break; - case SSE_RC_UP: - rnd_type = float_round_up; - break; - case SSE_RC_CHOP: - rnd_type = float_round_to_zero; - break; - } - set_float_rounding_mode(rnd_type, &env->sse_status); - - /* set denormals are zero */ - set_flush_inputs_to_zero((env->mxcsr & SSE_DAZ) ? 1 : 0, &env->sse_status); - - /* set flush to zero */ - set_flush_to_zero((env->mxcsr & SSE_FZ) ? 1 : 0, &env->fp_status); -} - -void helper_ldmxcsr(uint32_t val) -{ - env->mxcsr = val; - update_sse_status(); -} - -void helper_enter_mmx(void) -{ - env->fpstt = 0; - *(uint32_t *)(env->fptags) = 0; - *(uint32_t *)(env->fptags + 4) = 0; -} - -void helper_emms(void) -{ - /* set to empty state */ - *(uint32_t *)(env->fptags) = 0x01010101; - *(uint32_t *)(env->fptags + 4) = 0x01010101; -} - -/* XXX: suppress */ -void helper_movq(void *d, void *s) -{ - *(uint64_t *)d = *(uint64_t *)s; -} - -#define SHIFT 0 -#include "ops_sse.h" - -#define SHIFT 1 -#include "ops_sse.h" - #define SHIFT 0 #include "helper_template.h" #undef SHIFT