Patchwork [6/6] target-alpha: Implement IEEE FP qualifiers.

login
register
mail settings
Submitter Richard Henderson
Date Jan. 4, 2010, 10:27 p.m.
Message ID <f50bc4d93bf4205e888c2e8ae9d00da65dafa148.1262645165.git.rth@twiddle.net>
Download mbox | patch
Permalink /patch/42220/
State New
Headers show

Comments

Richard Henderson - Jan. 4, 2010, 10:27 p.m.
IEEE FP instructions are split up so that the rounding mode
coming from the instruction and exceptions (both masking and
delivery) are handled external to the base FP operation.
FP exceptions are properly raised for non-finite inputs to
instructions that do not indicate software completion.

A shortcut is applied if CONFIG_SOFTFLOAT_INLINE is defined
at the top of translate.c: data is loaded and stored into
FP_STATUS directly instead of using the functional interface
defined by "softfloat.h".

Signed-off-by: Richard Henderson <rth@twiddle.net>
---
 target-alpha/helper.h    |   21 ++-
 target-alpha/op_helper.c |  261 +++++++++++++++++++++--
 target-alpha/translate.c |  521 ++++++++++++++++++++++++++++++++++++++--------
 3 files changed, 688 insertions(+), 115 deletions(-)

Patch

diff --git a/target-alpha/helper.h b/target-alpha/helper.h
index bedd3c0..79cf375 100644
--- a/target-alpha/helper.h
+++ b/target-alpha/helper.h
@@ -83,7 +83,6 @@  DEF_HELPER_2(cpyse, i64, i64, i64)
 
 DEF_HELPER_1(cvtts, i64, i64)
 DEF_HELPER_1(cvtst, i64, i64)
-DEF_HELPER_1(cvttq, i64, i64)
 DEF_HELPER_1(cvtqs, i64, i64)
 DEF_HELPER_1(cvtqt, i64, i64)
 DEF_HELPER_1(cvtqf, i64, i64)
@@ -91,9 +90,25 @@  DEF_HELPER_1(cvtgf, i64, i64)
 DEF_HELPER_1(cvtgq, i64, i64)
 DEF_HELPER_1(cvtqg, i64, i64)
 DEF_HELPER_1(cvtlq, i64, i64)
+
+DEF_HELPER_1(cvttq, i64, i64)
+DEF_HELPER_1(cvttq_c, i64, i64)
+DEF_HELPER_1(cvttq_svic, i64, i64)
+
 DEF_HELPER_1(cvtql, i64, i64)
-DEF_HELPER_1(cvtqlv, i64, i64)
-DEF_HELPER_1(cvtqlsv, i64, i64)
+DEF_HELPER_1(cvtql_v, i64, i64)
+DEF_HELPER_1(cvtql_sv, i64, i64)
+
+DEF_HELPER_1(setroundmode, void, i32)
+DEF_HELPER_1(setflushzero, void, i32)
+DEF_HELPER_0(fp_exc_clear, void)
+DEF_HELPER_0(fp_exc_get, i32)
+DEF_HELPER_2(fp_exc_raise, void, i32, i32)
+DEF_HELPER_2(fp_exc_raise_s, void, i32, i32)
+
+DEF_HELPER_1(ieee_input, i64, i64)
+DEF_HELPER_1(ieee_input_cmp, i64, i64)
+DEF_HELPER_1(ieee_input_s, i64, i64)
 
 #if !defined (CONFIG_USER_ONLY)
 DEF_HELPER_0(hw_rei, void)
diff --git a/target-alpha/op_helper.c b/target-alpha/op_helper.c
index a322f12..7b1a869 100644
--- a/target-alpha/op_helper.c
+++ b/target-alpha/op_helper.c
@@ -370,6 +370,130 @@  uint64_t helper_unpkbw (uint64_t op1)
 
 /* Floating point helpers */
 
+void helper_setroundmode (uint32_t val)
+{
+    set_float_rounding_mode(val, &FP_STATUS);
+}
+
+void helper_setflushzero (uint32_t val)
+{
+    set_flush_to_zero(val, &FP_STATUS);
+}
+
+void helper_fp_exc_clear (void)
+{
+    set_float_exception_flags(0, &FP_STATUS);
+}
+
+uint32_t helper_fp_exc_get (void)
+{
+    return get_float_exception_flags(&FP_STATUS);
+}
+
+/* Raise exceptions for ieee fp insns without software completion.
+   In that case there are no exceptions that don't trap; the mask
+   doesn't apply.  */
+void helper_fp_exc_raise(uint32_t exc, uint32_t regno)
+{
+    if (exc) {
+        uint32_t hw_exc = 0;
+
+        env->ipr[IPR_EXC_MASK] |= 1ull << regno;
+
+        if (exc & float_flag_invalid) {
+            hw_exc |= EXC_M_INV;
+        }
+        if (exc & float_flag_divbyzero) {
+            hw_exc |= EXC_M_DZE;
+        }
+        if (exc & float_flag_overflow) {
+            hw_exc |= EXC_M_FOV;
+        }
+        if (exc & float_flag_underflow) {
+            hw_exc |= EXC_M_UNF;
+        }
+        if (exc & float_flag_inexact) {
+            hw_exc |= EXC_M_INE;
+        }
+        helper_excp(EXCP_ARITH, hw_exc);
+    }
+}
+
+/* Raise exceptions for ieee fp insns with software completion.  */
+void helper_fp_exc_raise_s(uint32_t exc, uint32_t regno)
+{
+    if (exc) {
+        env->fpcr_exc_status |= exc;
+
+        exc &= ~env->fpcr_exc_mask;
+        if (exc) {
+            helper_fp_exc_raise(exc, regno);
+        }
+    }
+}
+
+/* Input remapping without software completion.  Handle denormal-map-to-zero
+   and trap for all other non-finite numbers.  */
+uint64_t helper_ieee_input(uint64_t val)
+{
+    uint32_t exp = (uint32_t)(val >> 52) & 0x7ff;
+    uint64_t frac = val & 0xfffffffffffffull;
+
+    if (exp == 0) {
+        if (frac != 0) {
+            /* If DNZ is set flush denormals to zero on input.  */
+            if (env->fpcr_dnz) {
+                val &= 1ull << 63;
+            } else {
+                helper_excp(EXCP_ARITH, EXC_M_UNF);
+            }
+        }
+    } else if (exp == 0x7ff) {
+        /* Infinity or NaN.  */
+        /* ??? I'm not sure these exception bit flags are correct.  I do
+           know that the Linux kernel, at least, doesn't rely on them and
+           just emulates the insn to figure out what exception to use.  */
+        helper_excp(EXCP_ARITH, frac ? EXC_M_INV : EXC_M_FOV);
+    }
+    return val;
+}
+
+/* Similar, but does not trap for infinities.  Used for comparisons.  */
+uint64_t helper_ieee_input_cmp(uint64_t val)
+{
+    uint32_t exp = (uint32_t)(val >> 52) & 0x7ff;
+    uint64_t frac = val & 0xfffffffffffffull;
+
+    if (exp == 0) {
+        if (frac != 0) {
+            /* If DNZ is set flush denormals to zero on input.  */
+            if (env->fpcr_dnz) {
+                val &= 1ull << 63;
+            } else {
+                helper_excp(EXCP_ARITH, EXC_M_UNF);
+            }
+        }
+    } else if (exp == 0x7ff && frac) {
+        /* NaN.  */
+        helper_excp(EXCP_ARITH, EXC_M_INV);
+    }
+    return val;
+}
+
+/* Input remapping with software completion enabled.  All we have to do
+   is handle denormal-map-to-zero; all other inputs get exceptions as 
+   needed from the actual operation.  */
+uint64_t helper_ieee_input_s(uint64_t val)
+{
+    if (env->fpcr_dnz) {
+        uint32_t exp = (uint32_t)(val >> 52) & 0x7ff;
+        if (exp == 0) {
+            val &= 1ull << 63;
+        }
+    }
+    return val;
+}
+
 /* F floating (VAX) */
 static inline uint64_t float32_to_f(float32 fa)
 {
@@ -447,6 +571,9 @@  uint64_t helper_memory_to_f (uint32_t a)
     return r;
 }
 
+/* ??? Emulating VAX arithmetic with IEEE arithmetic is wrong.  We should
+   either implement VAX arithmetic properly or just signal invalid opcode.  */
+
 uint64_t helper_addf (uint64_t a, uint64_t b)
 {
     float32 fa, fb, fr;
@@ -931,10 +1058,107 @@  uint64_t helper_cvtqs (uint64_t a)
     return float32_to_s(fr);
 }
 
-uint64_t helper_cvttq (uint64_t a)
+/* Implement float64 to uint64 conversion without saturation -- we must
+   supply the truncated result.  This behaviour is used by the compiler
+   to get unsigned conversion for free with the same instruction. 
+
+   The VI flag is set when overflow or inexact exceptions should be raised.  */
+
+static inline uint64_t helper_cvttq_internal(uint64_t a, int roundmode, int VI)
 {
-    float64 fa = t_to_float64(a);
-    return float64_to_int64_round_to_zero(fa, &FP_STATUS);
+    uint64_t frac, ret = 0;
+    uint32_t exp, sign, exc = 0;
+    int shift;
+
+    sign = (a >> 63);
+    exp = (uint32_t)(a >> 52) & 0x7ff;
+    frac = a & 0xfffffffffffffull;
+
+    if (exp == 0) {
+        if (unlikely(frac != 0)) {
+            goto do_underflow;
+        }
+    } else if (exp == 0x7ff) {
+        exc = (frac ? float_flag_invalid : VI ? float_flag_overflow : 0);
+    } else {
+        /* Restore implicit bit.  */
+        frac |= 0x10000000000000ull;
+
+        shift = exp - 1023 - 52;
+        if (shift >= 0) {
+            /* In this case the number is so large that we must shift
+               the fraction left.  There is no rounding to do.  */
+            if (shift < 63) {
+                ret = frac << shift;
+                if (VI && (ret >> shift) != frac) {
+                    exc = float_flag_overflow;
+                }
+            }
+        } else {
+            uint64_t round;
+
+            /* In this case the number is smaller than the fraction as
+               represented by the 52 bit number.  Here we must think 
+               about rounding the result.  Handle this by shifting the
+               fractional part of the number into the high bits of ROUND.
+               This will let us efficiently handle round-to-nearest.  */
+            shift = -shift;
+            if (shift < 63) {
+                ret = frac >> shift;
+                round = frac << (64 - shift);
+            } else {
+                /* The exponent is so small we shift out everything.
+                   Leave a sticky bit for proper rounding below.  */
+            do_underflow:
+                round = 1;
+            }
+
+            if (round) {
+                exc = (VI ? float_flag_inexact : 0);
+                switch (roundmode) {
+                case float_round_nearest_even:
+                    if (round == (1ull << 63)) {
+                        /* Fraction is exactly 0.5; round to even.  */
+                        ret += (ret & 1);
+                    } else if (round > (1ull << 63)) {
+                        ret += 1;
+                    }
+                    break;
+                case float_round_to_zero:
+                    break;
+                case float_round_up:
+                    ret += 1 - sign;
+                    break;
+                case float_round_down:
+                    ret += sign;
+                    break;
+                }
+            }
+        }
+        if (sign) {
+            ret = -ret;
+        }
+    }
+    if (unlikely(exc)) {
+        float_raise(exc, &FP_STATUS);
+    }
+
+    return ret;
+}
+
+uint64_t helper_cvttq(uint64_t a)
+{
+    return helper_cvttq_internal(a, FP_STATUS.float_rounding_mode, 1);
+}
+
+uint64_t helper_cvttq_c(uint64_t a)
+{
+    return helper_cvttq_internal(a, float_round_to_zero, 0);
+}
+
+uint64_t helper_cvttq_svic(uint64_t a)
+{
+    return helper_cvttq_internal(a, float_round_to_zero, 1);
 }
 
 uint64_t helper_cvtqt (uint64_t a)
@@ -979,35 +1203,24 @@  uint64_t helper_cvtlq (uint64_t a)
     return (lo & 0x3FFFFFFF) | (hi & 0xc0000000);
 }
 
-static inline uint64_t __helper_cvtql(uint64_t a, int s, int v)
-{
-    uint64_t r;
-
-    r = ((uint64_t)(a & 0xC0000000)) << 32;
-    r |= ((uint64_t)(a & 0x7FFFFFFF)) << 29;
-
-    if (v && (int64_t)((int32_t)r) != (int64_t)r) {
-        helper_excp(EXCP_ARITH, EXCP_ARITH_OVERFLOW);
-    }
-    if (s) {
-        /* TODO */
-    }
-    return r;
-}
-
 uint64_t helper_cvtql (uint64_t a)
 {
-    return __helper_cvtql(a, 0, 0);
+    return ((a & 0xC0000000) << 32) | ((a & 0x7FFFFFFF) << 29);
 }
 
-uint64_t helper_cvtqlv (uint64_t a)
+uint64_t helper_cvtql_v (uint64_t a)
 {
-    return __helper_cvtql(a, 0, 1);
+    if ((int32_t)a != (int64_t)a)
+        helper_excp(EXCP_ARITH, EXC_M_IOV);
+    return helper_cvtql(a);
 }
 
-uint64_t helper_cvtqlsv (uint64_t a)
+uint64_t helper_cvtql_sv (uint64_t a)
 {
-    return __helper_cvtql(a, 1, 1);
+    /* ??? I'm pretty sure there's nothing that /sv needs to do that /v
+       doesn't do.  The only thing I can think is that /sv is a valid
+       instruction merely for completeness in the ISA.  */
+    return helper_cvtql_v(a);
 }
 
 /* PALcode support special instructions */
diff --git a/target-alpha/translate.c b/target-alpha/translate.c
index 515c8c7..a11e5ed 100644
--- a/target-alpha/translate.c
+++ b/target-alpha/translate.c
@@ -33,6 +33,7 @@ 
 #include "helper.h"
 
 #undef ALPHA_DEBUG_DISAS
+#define CONFIG_SOFTFLOAT_INLINE
 
 #ifdef ALPHA_DEBUG_DISAS
 #  define LOG_DISAS(...) qemu_log_mask(CPU_LOG_TB_IN_ASM, ## __VA_ARGS__)
@@ -49,6 +50,11 @@  struct DisasContext {
 #endif
     CPUAlphaState *env;
     uint32_t amask;
+
+    /* Current rounding mode for this TB.  */
+    int tb_rm;
+    /* Current flush-to-zero setting for this TB.  */
+    int tb_ftz;
 };
 
 /* global register indexes */
@@ -442,62 +448,333 @@  static void gen_fcmov(TCGCond inv_cond, int ra, int rb, int rc)
     gen_set_label(l1);
 }
 
-#define FARITH2(name)                                       \
-static inline void glue(gen_f, name)(int rb, int rc)        \
-{                                                           \
-    if (unlikely(rc == 31))                                 \
-      return;                                               \
-                                                            \
-    if (rb != 31)                                           \
-        gen_helper_ ## name (cpu_fir[rc], cpu_fir[rb]);    \
-    else {                                                  \
-        TCGv tmp = tcg_const_i64(0);                        \
-        gen_helper_ ## name (cpu_fir[rc], tmp);            \
-        tcg_temp_free(tmp);                                 \
-    }                                                       \
+#define QUAL_RM_N       0x080   /* Round mode nearest even */
+#define QUAL_RM_C       0x000   /* Round mode chopped */
+#define QUAL_RM_M       0x040   /* Round mode minus infinity */
+#define QUAL_RM_D       0x0c0   /* Round mode dynamic */
+#define QUAL_RM_MASK    0x0c0
+
+#define QUAL_U          0x100   /* Underflow enable (fp output) */
+#define QUAL_V          0x100   /* Overflow enable (int output) */
+#define QUAL_S          0x400   /* Software completion enable */
+#define QUAL_I          0x200   /* Inexact detection enable */
+
+static void gen_qual_roundmode(DisasContext *ctx, int fn11)
+{
+    TCGv_i32 tmp;
+
+    fn11 &= QUAL_RM_MASK;
+    if (fn11 == ctx->tb_rm) {
+        return;
+    }
+    ctx->tb_rm = fn11;
+
+    tmp = tcg_temp_new_i32();
+    switch (fn11) {
+    case QUAL_RM_N:
+        tcg_gen_movi_i32(tmp, float_round_nearest_even);
+        break;
+    case QUAL_RM_C:
+        tcg_gen_movi_i32(tmp, float_round_to_zero);
+        break;
+    case QUAL_RM_M:
+        tcg_gen_movi_i32(tmp, float_round_down);
+        break;
+    case QUAL_RM_D:
+        tcg_gen_ld8u_i32(tmp, cpu_env, offsetof(CPUState, fpcr_dyn_round));
+        break;
+    }
+
+#if defined(CONFIG_SOFTFLOAT_INLINE)
+    /* ??? The "softfloat.h" interface is to call set_float_rounding_mode.
+       With CONFIG_SOFTFLOAT that expands to an out-of-line call that just
+       sets the one field.  */
+    tcg_gen_st8_i32(tmp, cpu_env,
+                    offsetof(CPUState, fp_status.float_rounding_mode));
+#else
+    gen_helper_setroundmode(tmp);
+#endif
+
+    tcg_temp_free_i32(tmp);
+}
+
+static void gen_qual_flushzero(DisasContext *ctx, int fn11)
+{
+    TCGv_i32 tmp;
+
+    fn11 &= QUAL_U;
+    if (fn11 == ctx->tb_ftz) {
+        return;
+    }
+    ctx->tb_ftz = fn11;
+
+    tmp = tcg_temp_new_i32();
+    if (fn11) {
+        /* Underflow is enabled, use the FPCR setting.  */
+        tcg_gen_ld8u_i32(tmp, cpu_env, offsetof(CPUState, fpcr_flush_to_zero));
+    } else {
+        /* Underflow is disabled, force flush-to-zero.  */
+        tcg_gen_movi_i32(tmp, 1);
+    }
+
+#if defined(CONFIG_SOFTFLOAT_INLINE)
+    tcg_gen_st8_i32(tmp, cpu_env,
+                    offsetof(CPUState, fp_status.flush_to_zero));
+#else
+    gen_helper_setflushzero(tmp);
+#endif
+
+    tcg_temp_free_i32(tmp);
+}
+
+static TCGv gen_ieee_input(int reg, int fn11, int is_cmp)
+{
+    TCGv val = tcg_temp_new();
+    if (reg == 31) {
+        tcg_gen_movi_i64(val, 0);
+    } else if (fn11 & QUAL_S) {
+        gen_helper_ieee_input_s(val, cpu_fir[reg]);
+    } else if (is_cmp) {
+        gen_helper_ieee_input_cmp(val, cpu_fir[reg]);
+    } else {
+        gen_helper_ieee_input(val, cpu_fir[reg]);
+    }
+    return val;
+}
+
+static void gen_fp_exc_clear(void)
+{
+#if defined(CONFIG_SOFTFLOAT_INLINE)
+    TCGv_i32 zero = tcg_const_i32(0);
+    tcg_gen_st8_i32(zero, cpu_env,
+                    offsetof(CPUState, fp_status.float_exception_flags));
+    tcg_temp_free_i32(zero);
+#else
+    gen_helper_fp_exc_clear();
+#endif
+}
+
+static void gen_fp_exc_raise_ignore(int rc, int fn11, int ignore)
+{
+    /* ??? We ought to be able to do something with imprecise exceptions.
+       E.g. notice we're still in the trap shadow of something within the
+       TB and do not generate the code to signal the exception; end the TB
+       when an exception is forced to arrive, either by consumption of a
+       register value or TRAPB or EXCB.  */
+    TCGv_i32 exc = tcg_temp_new_i32();
+    TCGv_i32 reg;
+
+#if defined(CONFIG_SOFTFLOAT_INLINE)
+    tcg_gen_ld8u_i32(exc, cpu_env,
+                     offsetof(CPUState, fp_status.float_exception_flags));
+#else
+    gen_helper_fp_exc_get(exc);
+#endif
+
+    if (ignore) {
+        tcg_gen_andi_i32(exc, exc, ~ignore);
+    }
+
+    /* ??? Pass in the regno of the destination so that the helper can
+       set EXC_MASK, which contains a bitmask of destination registers
+       that have caused arithmetic traps.  A simple userspace emulation
+       does not require this.  We do need it for a guest kernel's entArith,
+       or if we were to do something clever with imprecise exceptions.  */
+    reg = tcg_const_i32(rc + 32);
+
+    if (fn11 & QUAL_S) {
+        gen_helper_fp_exc_raise_s(exc, reg);
+    } else {
+        gen_helper_fp_exc_raise(exc, reg);
+    }
+
+    tcg_temp_free_i32(reg);
+    tcg_temp_free_i32(exc);
+}
+
+static inline void gen_fp_exc_raise(int rc, int fn11)
+{
+    gen_fp_exc_raise_ignore(rc, fn11, fn11 & QUAL_I ? 0 : float_flag_inexact);
 }
-FARITH2(sqrts)
+
+#define FARITH2(name)                                   \
+static inline void glue(gen_f, name)(int rb, int rc)    \
+{                                                       \
+    if (unlikely(rc == 31)) {                           \
+        return;                                         \
+    }                                                   \
+    if (rb != 31) {                                     \
+        gen_helper_ ## name (cpu_fir[rc], cpu_fir[rb]); \
+    } else {						\
+        TCGv tmp = tcg_const_i64(0);                    \
+        gen_helper_ ## name (cpu_fir[rc], tmp);         \
+        tcg_temp_free(tmp);                             \
+    }                                                   \
+}
+FARITH2(cvtlq)
+FARITH2(cvtql)
+FARITH2(cvtql_v)
+FARITH2(cvtql_sv)
+
+/* ??? VAX instruction qualifiers ignored.  */
 FARITH2(sqrtf)
 FARITH2(sqrtg)
-FARITH2(sqrtt)
 FARITH2(cvtgf)
 FARITH2(cvtgq)
 FARITH2(cvtqf)
 FARITH2(cvtqg)
-FARITH2(cvtst)
-FARITH2(cvtts)
-FARITH2(cvttq)
-FARITH2(cvtqs)
-FARITH2(cvtqt)
-FARITH2(cvtlq)
-FARITH2(cvtql)
-FARITH2(cvtqlv)
-FARITH2(cvtqlsv)
-
-#define FARITH3(name)                                                     \
-static inline void glue(gen_f, name)(int ra, int rb, int rc)              \
-{                                                                         \
-    if (unlikely(rc == 31))                                               \
-        return;                                                           \
-                                                                          \
-    if (ra != 31) {                                                       \
-        if (rb != 31)                                                     \
-            gen_helper_ ## name (cpu_fir[rc], cpu_fir[ra], cpu_fir[rb]);  \
-        else {                                                            \
-            TCGv tmp = tcg_const_i64(0);                                  \
-            gen_helper_ ## name (cpu_fir[rc], cpu_fir[ra], tmp);          \
-            tcg_temp_free(tmp);                                           \
-        }                                                                 \
-    } else {                                                              \
-        TCGv tmp = tcg_const_i64(0);                                      \
-        if (rb != 31)                                                     \
-            gen_helper_ ## name (cpu_fir[rc], tmp, cpu_fir[rb]);          \
-        else                                                              \
-            gen_helper_ ## name (cpu_fir[rc], tmp, tmp);                   \
-        tcg_temp_free(tmp);                                               \
-    }                                                                     \
+
+static void gen_ieee_arith2(DisasContext *ctx, void (*helper)(TCGv, TCGv),
+                            int rb, int rc, int fn11)
+{
+    TCGv vb;
+
+    /* ??? This is wrong: the instruction is not a nop, it still may
+       raise exceptions.  */
+    if (unlikely(rc == 31)) {
+        return;
+    }
+
+    gen_qual_roundmode(ctx, fn11);
+    gen_qual_flushzero(ctx, fn11);
+    gen_fp_exc_clear();
+
+    vb = gen_ieee_input(rb, fn11, 0);
+    helper(cpu_fir[rc], vb);
+    tcg_temp_free(vb);
+
+    gen_fp_exc_raise(rc, fn11);
+}
+
+#define IEEE_ARITH2(name)                                       \
+static inline void glue(gen_f, name)(DisasContext *ctx,         \
+                                     int rb, int rc, int fn11)  \
+{                                                               \
+    gen_ieee_arith2(ctx, gen_helper_##name, rb, rc, fn11);      \
+}
+IEEE_ARITH2(sqrts)
+IEEE_ARITH2(sqrtt)
+IEEE_ARITH2(cvtst)
+IEEE_ARITH2(cvtts)
+
+static void gen_fcvttq(DisasContext *ctx, int rb, int rc, int fn11)
+{
+    TCGv vb;
+    int ignore = 0;
+
+    /* ??? This is wrong: the instruction is not a nop, it still may
+       raise exceptions.  */
+    if (unlikely(rc == 31)) {
+        return;
+    }
+
+    /* No need to set flushzero, since we have an integer output.  */
+    gen_fp_exc_clear();
+    vb = gen_ieee_input(rb, fn11, 0);
+
+    /* Almost all integer conversions use cropped rounding, and most
+       also do not have integer overflow enabled.  Special case that.  */
+    switch (fn11) {
+    case QUAL_RM_C:
+        gen_helper_cvttq_c(cpu_fir[rc], vb);
+        break;
+    case QUAL_V | QUAL_RM_C:
+    case QUAL_S | QUAL_V | QUAL_RM_C:
+        ignore = float_flag_inexact;
+        /* FALLTHRU */
+    case QUAL_S | QUAL_V | QUAL_I | QUAL_RM_C:
+        gen_helper_cvttq_svic(cpu_fir[rc], vb);
+        break;
+    default:
+        gen_qual_roundmode(ctx, fn11);
+        gen_helper_cvttq(cpu_fir[rc], vb);
+        ignore |= (fn11 & QUAL_V ? 0 : float_flag_overflow);
+        ignore |= (fn11 & QUAL_I ? 0 : float_flag_inexact);
+        break;
+    }
+    tcg_temp_free(vb);
+
+    gen_fp_exc_raise_ignore(rc, fn11, ignore);
 }
 
+static void gen_ieee_intcvt(DisasContext *ctx, void (*helper)(TCGv, TCGv),
+			    int rb, int rc, int fn11)
+{
+    TCGv vb;
+
+    /* ??? This is wrong: the instruction is not a nop, it still may
+       raise exceptions.  */
+    if (unlikely(rc == 31)) {
+        return;
+    }
+
+    gen_qual_roundmode(ctx, fn11);
+
+    if (rb == 31) {
+        vb = tcg_const_i64(0);
+    } else {
+        vb = cpu_fir[rb];
+    }
+
+    /* The only exception that can be raised by integer conversion
+       is inexact.  Thus we only need to worry about exceptions when
+       inexact handling is requested.  */
+    if (fn11 & QUAL_I) {
+        gen_fp_exc_clear();
+        helper(cpu_fir[rc], vb);
+        gen_fp_exc_raise(rc, fn11);
+    } else {
+        helper(cpu_fir[rc], vb);
+    }
+
+    if (rb == 31) {
+        tcg_temp_free(vb);
+    }
+}
+
+#define IEEE_INTCVT(name)                                       \
+static inline void glue(gen_f, name)(DisasContext *ctx,         \
+                                     int rb, int rc, int fn11)  \
+{                                                               \
+    gen_ieee_intcvt(ctx, gen_helper_##name, rb, rc, fn11);      \
+}
+IEEE_INTCVT(cvtqs)
+IEEE_INTCVT(cvtqt)
+
+#define FARITH3(name)                                           \
+static inline void glue(gen_f, name)(int ra, int rb, int rc)    \
+{                                                               \
+    TCGv va, vb;                                                \
+                                                                \
+    if (unlikely(rc == 31)) {                                   \
+        return;                                                 \
+    }                                                           \
+    if (ra == 31) {                                             \
+        va = tcg_const_i64(0);                                  \
+    } else {                                                    \
+        va = cpu_fir[ra];                                       \
+    }                                                           \
+    if (rb == 31) {                                             \
+        vb = tcg_const_i64(0);                                  \
+    } else {                                                    \
+        vb = cpu_fir[rb];                                       \
+    }                                                           \
+                                                                \
+    gen_helper_ ## name (cpu_fir[rc], va, vb);                  \
+                                                                \
+    if (ra == 31) {                                             \
+        tcg_temp_free(va);                                      \
+    }                                                           \
+    if (rb == 31) {                                             \
+        tcg_temp_free(vb);                                      \
+    }                                                           \
+}
+/* ??? Ought to expand these inline; simple masking operations.  */
+FARITH3(cpys)
+FARITH3(cpysn)
+FARITH3(cpyse)
+
+/* ??? VAX instruction qualifiers ignored.  */
 FARITH3(addf)
 FARITH3(subf)
 FARITH3(mulf)
@@ -509,21 +786,80 @@  FARITH3(divg)
 FARITH3(cmpgeq)
 FARITH3(cmpglt)
 FARITH3(cmpgle)
-FARITH3(adds)
-FARITH3(subs)
-FARITH3(muls)
-FARITH3(divs)
-FARITH3(addt)
-FARITH3(subt)
-FARITH3(mult)
-FARITH3(divt)
-FARITH3(cmptun)
-FARITH3(cmpteq)
-FARITH3(cmptlt)
-FARITH3(cmptle)
-FARITH3(cpys)
-FARITH3(cpysn)
-FARITH3(cpyse)
+
+static void gen_ieee_arith3(DisasContext *ctx,
+                            void (*helper)(TCGv, TCGv, TCGv),
+                            int ra, int rb, int rc, int fn11)
+{
+    TCGv va, vb;
+
+    /* ??? This is wrong: the instruction is not a nop, it still may
+       raise exceptions.  */
+    if (unlikely(rc == 31)) {
+        return;
+    }
+
+    gen_qual_roundmode(ctx, fn11);
+    gen_qual_flushzero(ctx, fn11);
+    gen_fp_exc_clear();
+
+    va = gen_ieee_input(ra, fn11, 0);
+    vb = gen_ieee_input(rb, fn11, 0);
+    helper(cpu_fir[rc], va, vb);
+    tcg_temp_free(va);
+    tcg_temp_free(vb);
+
+    gen_fp_exc_raise(rc, fn11);
+}
+
+#define IEEE_ARITH3(name)                                               \
+static inline void glue(gen_f, name)(DisasContext *ctx,                 \
+                                     int ra, int rb, int rc, int fn11)  \
+{                                                                       \
+    gen_ieee_arith3(ctx, gen_helper_##name, ra, rb, rc, fn11);          \
+}
+IEEE_ARITH3(adds)
+IEEE_ARITH3(subs)
+IEEE_ARITH3(muls)
+IEEE_ARITH3(divs)
+IEEE_ARITH3(addt)
+IEEE_ARITH3(subt)
+IEEE_ARITH3(mult)
+IEEE_ARITH3(divt)
+
+static void gen_ieee_compare(DisasContext *ctx,
+                             void (*helper)(TCGv, TCGv, TCGv),
+                             int ra, int rb, int rc, int fn11)
+{
+    TCGv va, vb;
+
+    /* ??? This is wrong: the instruction is not a nop, it still may
+       raise exceptions.  */
+    if (unlikely(rc == 31)) {
+        return;
+    }
+
+    gen_fp_exc_clear();
+
+    va = gen_ieee_input(ra, fn11, 1);
+    vb = gen_ieee_input(rb, fn11, 1);
+    helper(cpu_fir[rc], va, vb);
+    tcg_temp_free(va);
+    tcg_temp_free(vb);
+
+    gen_fp_exc_raise(rc, fn11);
+}
+
+#define IEEE_CMP3(name)                                                 \
+static inline void glue(gen_f, name)(DisasContext *ctx,                 \
+                                     int ra, int rb, int rc, int fn11)  \
+{                                                                       \
+    gen_ieee_compare(ctx, gen_helper_##name, ra, rb, rc, fn11);         \
+}
+IEEE_CMP3(cmptun)
+IEEE_CMP3(cmpteq)
+IEEE_CMP3(cmptlt)
+IEEE_CMP3(cmptle)
 
 static inline uint64_t zapnot_mask(uint8_t lit)
 {
@@ -1607,7 +1943,7 @@  static inline int translate_one(DisasContext *ctx, uint32_t insn)
         }
         break;
     case 0x14:
-        switch (fpfn) { /* f11 & 0x3F */
+        switch (fpfn) { /* fn11 & 0x3F */
         case 0x04:
             /* ITOFS */
             if (!(ctx->amask & AMASK_FIX))
@@ -1632,7 +1968,7 @@  static inline int translate_one(DisasContext *ctx, uint32_t insn)
             /* SQRTS */
             if (!(ctx->amask & AMASK_FIX))
                 goto invalid_opc;
-            gen_fsqrts(rb, rc);
+            gen_fsqrts(ctx, rb, rc, fn11);
             break;
         case 0x14:
             /* ITOFF */
@@ -1669,7 +2005,7 @@  static inline int translate_one(DisasContext *ctx, uint32_t insn)
             /* SQRTT */
             if (!(ctx->amask & AMASK_FIX))
                 goto invalid_opc;
-            gen_fsqrtt(rb, rc);
+            gen_fsqrtt(ctx, rb, rc, fn11);
             break;
         default:
             goto invalid_opc;
@@ -1678,7 +2014,7 @@  static inline int translate_one(DisasContext *ctx, uint32_t insn)
     case 0x15:
         /* VAX floating point */
         /* XXX: rounding mode and trap are ignored (!) */
-        switch (fpfn) { /* f11 & 0x3F */
+        switch (fpfn) { /* fn11 & 0x3F */
         case 0x00:
             /* ADDF */
             gen_faddf(ra, rb, rc);
@@ -1761,77 +2097,75 @@  static inline int translate_one(DisasContext *ctx, uint32_t insn)
         break;
     case 0x16:
         /* IEEE floating-point */
-        /* XXX: rounding mode and traps are ignored (!) */
-        switch (fpfn) { /* f11 & 0x3F */
+        switch (fpfn) { /* fn11 & 0x3F */
         case 0x00:
             /* ADDS */
-            gen_fadds(ra, rb, rc);
+            gen_fadds(ctx, ra, rb, rc, fn11);
             break;
         case 0x01:
             /* SUBS */
-            gen_fsubs(ra, rb, rc);
+            gen_fsubs(ctx, ra, rb, rc, fn11);
             break;
         case 0x02:
             /* MULS */
-            gen_fmuls(ra, rb, rc);
+            gen_fmuls(ctx, ra, rb, rc, fn11);
             break;
         case 0x03:
             /* DIVS */
-            gen_fdivs(ra, rb, rc);
+            gen_fdivs(ctx, ra, rb, rc, fn11);
             break;
         case 0x20:
             /* ADDT */
-            gen_faddt(ra, rb, rc);
+            gen_faddt(ctx, ra, rb, rc, fn11);
             break;
         case 0x21:
             /* SUBT */
-            gen_fsubt(ra, rb, rc);
+            gen_fsubt(ctx, ra, rb, rc, fn11);
             break;
         case 0x22:
             /* MULT */
-            gen_fmult(ra, rb, rc);
+            gen_fmult(ctx, ra, rb, rc, fn11);
             break;
         case 0x23:
             /* DIVT */
-            gen_fdivt(ra, rb, rc);
+            gen_fdivt(ctx, ra, rb, rc, fn11);
             break;
         case 0x24:
             /* CMPTUN */
-            gen_fcmptun(ra, rb, rc);
+            gen_fcmptun(ctx, ra, rb, rc, fn11);
             break;
         case 0x25:
             /* CMPTEQ */
-            gen_fcmpteq(ra, rb, rc);
+            gen_fcmpteq(ctx, ra, rb, rc, fn11);
             break;
         case 0x26:
             /* CMPTLT */
-            gen_fcmptlt(ra, rb, rc);
+            gen_fcmptlt(ctx, ra, rb, rc, fn11);
             break;
         case 0x27:
             /* CMPTLE */
-            gen_fcmptle(ra, rb, rc);
+            gen_fcmptle(ctx, ra, rb, rc, fn11);
             break;
         case 0x2C:
-            /* XXX: incorrect */
             if (fn11 == 0x2AC || fn11 == 0x6AC) {
                 /* CVTST */
-                gen_fcvtst(rb, rc);
+                gen_fcvtst(ctx, rb, rc, fn11);
             } else {
                 /* CVTTS */
-                gen_fcvtts(rb, rc);
+                gen_fcvtts(ctx, rb, rc, fn11);
             }
             break;
         case 0x2F:
             /* CVTTQ */
-            gen_fcvttq(rb, rc);
+            gen_fcvttq(ctx, rb, rc, fn11);
             break;
         case 0x3C:
             /* CVTQS */
-            gen_fcvtqs(rb, rc);
+            gen_fcvtqs(ctx, rb, rc, fn11);
             break;
         case 0x3E:
             /* CVTQT */
-            gen_fcvtqt(rb, rc);
+            gen_fcvtqt(ctx, rb, rc, fn11);
             break;
         default:
             goto invalid_opc;
@@ -1910,11 +2244,11 @@  static inline int translate_one(DisasContext *ctx, uint32_t insn)
             break;
         case 0x130:
             /* CVTQL/V */
-            gen_fcvtqlv(rb, rc);
+            gen_fcvtql_v(rb, rc);
             break;
         case 0x530:
             /* CVTQL/SV */
-            gen_fcvtqlsv(rb, rc);
+            gen_fcvtql_sv(rb, rc);
             break;
         default:
             goto invalid_opc;
@@ -2597,6 +2931,17 @@  static inline void gen_intermediate_code_internal(CPUState *env,
     ctx.mem_idx = ((env->ps >> 3) & 3);
     ctx.pal_mode = env->ipr[IPR_EXC_ADDR] & 1;
 #endif
+
+    /* ??? Every TB begins with unset rounding mode, to be initialized on
+       the first fp insn of the TB.  Alternately we could define a proper
+       default for every TB (e.g. QUAL_RM_N or QUAL_RM_D) and make sure
+       to reset the FP_STATUS to that default at the end of any TB that
+       changes the default.  We could even (gasp) dynamiclly figure out
+       what default would be most efficient given the running program.  */
+    ctx.tb_rm = -1;
+    /* Similarly for flush-to-zero.  */
+    ctx.tb_ftz = -1;
+
     num_insns = 0;
     max_insns = tb->cflags & CF_COUNT_MASK;
     if (max_insns == 0)