diff mbox

target-alpha: An approach to fp insn qualifiers

Message ID 4B26D8EF.10801@twiddle.net
State New
Headers show

Commit Message

Richard Henderson Dec. 15, 2009, 12:31 a.m. UTC
On 12/14/2009 12:11 PM, Laurent Desnogues wrote:
> I'll take a closer look at your patch tomorrow.

For the record, I believe this finishes what I had in mind for the 
exception handling there in op_handler.c.


r~
commit ce6c2abc1d5d437dde980b4addc7da0f0f5de252
Author: Richard Henderson <rth@twiddle.net>
Date:   Mon Dec 14 16:27:39 2009 -0800

    target-alpha: Implement arithmetic exceptions for IEEE fp.
diff mbox

Patch

diff --git a/target-alpha/cpu.h b/target-alpha/cpu.h
index c0dff4b..c1c0470 100644
--- a/target-alpha/cpu.h
+++ b/target-alpha/cpu.h
@@ -430,9 +430,13 @@  enum {
 };
 
 /* Arithmetic exception */
-enum {
-    EXCP_ARITH_OVERFLOW,
-};
+#define EXC_M_IOV	(1<<16)		/* Integer Overflow */
+#define EXC_M_INE	(1<<15)		/* Inexact result */
+#define EXC_M_UNF	(1<<14)		/* Underflow */
+#define EXC_M_FOV	(1<<13)		/* Overflow */
+#define EXC_M_DZE	(1<<12)		/* Division by zero */
+#define EXC_M_INV	(1<<11)		/* Invalid operation */
+#define EXC_M_SWC	(1<<10)		/* Software completion */
 
 enum {
     IR_V0   = 0,
diff --git a/target-alpha/helper.c b/target-alpha/helper.c
index a658f97..a29f785 100644
--- a/target-alpha/helper.c
+++ b/target-alpha/helper.c
@@ -27,41 +27,13 @@ 
 
 uint64_t cpu_alpha_load_fpcr (CPUState *env)
 {
-    uint64_t ret = 0;
-    int flags, mask;
-
-    flags = env->fp_status.float_exception_flags;
-    ret |= (uint64_t) flags << 52;
-    if (flags)
-        ret |= FPCR_SUM;
-    env->ipr[IPR_EXC_SUM] &= ~0x3E;
-    env->ipr[IPR_EXC_SUM] |= flags << 1;
-
-    mask = env->fp_status.float_exception_mask;
-    if (mask & float_flag_invalid)
-        ret |= FPCR_INVD;
-    if (mask & float_flag_divbyzero)
-        ret |= FPCR_DZED;
-    if (mask & float_flag_overflow)
-        ret |= FPCR_OVFD;
-    if (mask & float_flag_underflow)
-        ret |= FPCR_UNFD;
-    if (mask & float_flag_inexact)
-        ret |= FPCR_INED;
-
-    switch (env->fp_status.float_rounding_mode) {
-    case float_round_nearest_even:
-        ret |= 2ULL << FPCR_DYN_SHIFT;
-        break;
-    case float_round_down:
-        ret |= 1ULL << FPCR_DYN_SHIFT;
-        break;
-    case float_round_up:
-        ret |= 3ULL << FPCR_DYN_SHIFT;
-        break;
-    case float_round_to_zero:
-        break;
-    }
+    uint64_t ret = env->fp_status.float_exception_flags;
+
+    if (ret)
+      ret = FPCR_SUM | (ret << 52);
+
+    ret |= env->fpcr & ~(FPCR_SUM | FPCR_STATUS_MASK);
+
     return ret;
 }
 
@@ -69,6 +41,8 @@  void cpu_alpha_store_fpcr (CPUState *env, uint64_t val)
 {
     int round_mode, mask;
 
+    env->fpcr = val;
+
     set_float_exception_flags((val >> 52) & 0x3F, &env->fp_status);
 
     mask = 0;
@@ -86,6 +60,7 @@  void cpu_alpha_store_fpcr (CPUState *env, uint64_t val)
 
     switch ((val >> FPCR_DYN_SHIFT) & 3) {
     case 0:
+    default:
         round_mode = float_round_to_zero;
         break;
     case 1:
@@ -99,6 +74,11 @@  void cpu_alpha_store_fpcr (CPUState *env, uint64_t val)
         break;
     }
     set_float_rounding_mode(round_mode, &env->fp_status);
+
+    mask = 0;
+    if ((val & (FPCR_UNDZ|FPCR_UNFD)) == (FPCR_UNDZ|FPCR_UNFD))
+        mask = 1;
+    set_flush_to_zero(mask, &env->fp_status);
 }
 
 #if defined(CONFIG_USER_ONLY)
diff --git a/target-alpha/op_helper.c b/target-alpha/op_helper.c
index 3bb0020..d031f56 100644
--- a/target-alpha/op_helper.c
+++ b/target-alpha/op_helper.c
@@ -78,7 +78,7 @@  uint64_t helper_addqv (uint64_t op1, uint64_t op2)
     uint64_t tmp = op1;
     op1 += op2;
     if (unlikely((tmp ^ op2 ^ (-1ULL)) & (tmp ^ op1) & (1ULL << 63))) {
-        helper_excp(EXCP_ARITH, EXCP_ARITH_OVERFLOW);
+        helper_excp(EXCP_ARITH, EXC_M_IOV);
     }
     return op1;
 }
@@ -88,7 +88,7 @@  uint64_t helper_addlv (uint64_t op1, uint64_t op2)
     uint64_t tmp = op1;
     op1 = (uint32_t)(op1 + op2);
     if (unlikely((tmp ^ op2 ^ (-1UL)) & (tmp ^ op1) & (1UL << 31))) {
-        helper_excp(EXCP_ARITH, EXCP_ARITH_OVERFLOW);
+        helper_excp(EXCP_ARITH, EXC_M_IOV);
     }
     return op1;
 }
@@ -98,7 +98,7 @@  uint64_t helper_subqv (uint64_t op1, uint64_t op2)
     uint64_t res;
     res = op1 - op2;
     if (unlikely((op1 ^ op2) & (res ^ op1) & (1ULL << 63))) {
-        helper_excp(EXCP_ARITH, EXCP_ARITH_OVERFLOW);
+        helper_excp(EXCP_ARITH, EXC_M_IOV);
     }
     return res;
 }
@@ -108,7 +108,7 @@  uint64_t helper_sublv (uint64_t op1, uint64_t op2)
     uint32_t res;
     res = op1 - op2;
     if (unlikely((op1 ^ op2) & (res ^ op1) & (1UL << 31))) {
-        helper_excp(EXCP_ARITH, EXCP_ARITH_OVERFLOW);
+        helper_excp(EXCP_ARITH, EXC_M_IOV);
     }
     return res;
 }
@@ -118,7 +118,7 @@  uint64_t helper_mullv (uint64_t op1, uint64_t op2)
     int64_t res = (int64_t)op1 * (int64_t)op2;
 
     if (unlikely((int32_t)res != res)) {
-        helper_excp(EXCP_ARITH, EXCP_ARITH_OVERFLOW);
+        helper_excp(EXCP_ARITH, EXC_M_IOV);
     }
     return (int64_t)((int32_t)res);
 }
@@ -130,7 +130,7 @@  uint64_t helper_mulqv (uint64_t op1, uint64_t op2)
     muls64(&tl, &th, op1, op2);
     /* If th != 0 && th != -1, then we had an overflow */
     if (unlikely((th + 1) > 1)) {
-        helper_excp(EXCP_ARITH, EXCP_ARITH_OVERFLOW);
+        helper_excp(EXCP_ARITH, EXC_M_IOV);
     }
     return tl;
 }
@@ -370,87 +370,175 @@  uint64_t helper_unpkbw (uint64_t op1)
 
 /* Floating point helpers */
 
+/* ??? Not implemented is setting EXC_MASK, containing a bitmask of
+   destination registers of instructions that have caused arithmetic
+   traps.  Not needed for userspace emulation, or for complete 
+   emulation of the entire fpu stack within qemu.  But we would need
+   it to invoke a guest kernel's entArith trap handler properly.
+   
+   It would be possible to encode the FP destination register in the
+   QUAL parameter for the FPU helpers below; additional changes would
+   be required for ADD/V et al above.  */
+
+#define QUAL_RM_N	0x080	/* Round mode nearest even */
+#define QUAL_RM_C	0x000	/* Round mode chopped */
+#define QUAL_RM_M	0x040	/* Round mode minus infinity */
+#define QUAL_RM_D	0x0c0	/* Round mode dynamic */
+#define QUAL_RM_MASK	0x0c0
+
+#define QUAL_U		0x100	/* Underflow enable (fp output) */
+#define QUAL_V		0x100	/* Overflow enable (int output) */
+#define QUAL_S		0x400	/* Software completion enable */
+#define QUAL_I		0x200	/* Inexact detection enable */
+
 /* If the floating-point qualifiers specified a rounding mode,
    set that rounding mode and remember the original mode for
    resetting at the end of the instruction.  */
-static inline uint32_t begin_quals_roundmode(uint32_t qual)
+static inline uint32_t begin_fp_roundmode(uint32_t qual)
 {
     uint32_t rm = FP_STATUS.float_rounding_mode, old_rm = rm;
 
-    switch (qual & 0xc0) {
-    case 0x80:
+    switch (qual & QUAL_RM_MASK) {
+    default:
+    case QUAL_RM_N:
         rm = float_round_nearest_even;
         break;
-    case 0x00:
+    case QUAL_RM_C:
         rm = float_round_to_zero;
         break;
-    case 0x40:
+    case QUAL_RM_M:
         rm = float_round_down;
         break;
-    case 0xc0:
+    case QUAL_RM_D:
         return old_rm;
     }
-    set_float_rounding_mode(rm, &FP_STATUS);
+    if (old_rm != rm)
+        set_float_rounding_mode(rm, &FP_STATUS);
     return old_rm;
 }
 
-/* If the floating-point qualifiers specified extra exception processing
-   (i.e. /u or /su), zero the exception flags so that we can determine if
-   the current instruction raises any exceptions.  Save the old acrued
-   exception status so that we can restore them at the end of the insn.  */
-static inline uint32_t begin_quals_exception(uint32_t qual)
+/* Zero the exception flags so that we can determine if the current
+   instruction raises any exceptions.  Save the old acrued exception
+   status so that we can restore them at the end of the insn.  */
+static inline uint32_t begin_fp_exception(void)
 {
-    uint32_t old_exc = 0;
-    if (qual & 0x500) {
-        old_exc = (uint32_t)FP_STATUS.float_exception_flags << 8;
-        set_float_exception_flags(0, &FP_STATUS);
-    }
+    uint32_t old_exc = (uint32_t)FP_STATUS.float_exception_flags << 8;
+    set_float_exception_flags(0, &FP_STATUS);
     return old_exc;
 }
 
+static inline uint32_t begin_fp_flush_to_zero(uint32_t quals)
+{
+    /* If underflow detection is disabled, silently flush to zero.
+       Note that flush-to-zero mode may already be enabled via the FPCR.  */
+    if ((quals & QUAL_U) == 0 && !FP_STATUS.flush_to_zero) {
+        set_flush_to_zero(1, &FP_STATUS);
+        return 0x10000;
+    }
+    return 0;
+}
+
 /* Begin processing an fp operation.  Return a token that should be passed
    when completing the fp operation.  */
-static inline uint32_t begin_fp(uint32_t quals)
+static uint32_t begin_fp(uint32_t quals)
 {
     uint32_t ret = 0;
 
-    ret |= begin_quals_roundmode(quals);
-    ret |= begin_quals_exception(quals);
+    ret |= begin_fp_roundmode(quals);
+    ret |= begin_fp_flush_to_zero(quals);
+    ret |= begin_fp_exception();
 
     return ret;
 }
 
 /* End processing an fp operation.  */
-static inline void end_fp(uint32_t quals, uint32_t orig)
-{
-    uint8_t exc = FP_STATUS.float_exception_flags;
 
-    set_float_exception_flags(exc | (orig >> 8), &FP_STATUS);
-    set_float_rounding_mode(orig & 0xff, &FP_STATUS);
+static inline void end_fp_roundmode(uint32_t orig)
+{
+    uint32_t rm = FP_STATUS.float_rounding_mode, old_rm = orig & 0xff;
+    if (unlikely(rm != old_rm))
+        set_float_rounding_mode(old_rm, &FP_STATUS);
+}
 
-    /* TODO: check quals and exc and raise any exceptions needed.  */
+static inline void end_fp_flush_to_zero(uint32_t orig)
+{
+    if (orig & 0x10000)
+        set_flush_to_zero(0, &FP_STATUS);
 }
 
-/* Raise any exceptions needed for using F, given the insn qualifiers.  */
-static inline void float32_input(uint32_t quals, float32 f)
+static void end_fp_exception(uint32_t quals, uint32_t orig)
 {
-    /* If /s is used, no exceptions are raised immediately.  */
-    /* ??? This for userspace only.  If we are emulating the real hw, then
-       we may well need to trap to the kernel for software emulation.  */
-    /* ??? Shouldn't we raise an exception for SNAN?  */
-    if (quals & 0x500)
-        return;
-    /* TODO: Check for inf, nan, denormal and trap.  */
+    uint8_t exc = FP_STATUS.float_exception_flags;
+
+    /* If inexact detection is disabled, silently clear it.  */
+    if ((quals & QUAL_I) == 0)
+        exc &= ~float_flag_inexact;
+
+    orig = (orig >> 8) & 0xff;
+    set_float_exception_flags(exc | orig, &FP_STATUS);
+
+    /* Raise an exception as required.  */
+    if (unlikely(exc)) {
+        if (quals & QUAL_S)
+            exc &= ~FP_STATUS.float_exception_mask;
+        if (exc) {
+            uint32_t hw_exc = 0;
+
+            if (exc & float_flag_invalid)
+                hw_exc |= EXC_M_INV;
+            if (exc & float_flag_divbyzero)
+                hw_exc |= EXC_M_DZE;
+            if (exc & float_flag_overflow)
+                hw_exc |= EXC_M_FOV;
+            if (exc & float_flag_underflow)
+                hw_exc |= EXC_M_UNF;
+            if (exc & float_flag_inexact)
+                hw_exc |= EXC_M_INE;
+
+            helper_excp(EXCP_ARITH, hw_exc);
+        }
+    }
 }
 
-static inline void float64_input(uint32_t quals, float64 f)
+static void end_fp(uint32_t quals, uint32_t orig)
 {
-    /* TODO: Exactly like above, except for float64.  */
+    end_fp_roundmode(orig);
+    end_fp_flush_to_zero(orig);
+    end_fp_exception(quals, orig);
 }
 
+static uint64_t remap_ieee_input(uint32_t quals, uint64_t a)
+{
+    uint64_t frac;
+    uint32_t exp;
+
+    exp = (uint32_t)(a >> 52) & 0x7ff;
+    frac = a & 0xfffffffffffffull;
+
+    if (exp == 0) {
+        if (frac != 0) {
+            /* If DNZ is set, flush denormals to zero on input.  */
+            if (env->fpcr & FPCR_DNZ)
+                a = a & (1ull << 63);
+            /* If software completion not enabled, trap.  */
+            else if ((quals & QUAL_S) == 0)
+                helper_excp(EXCP_ARITH, EXC_M_UNF);
+        }
+    } else if (exp == 0x7ff) {
+        /* Infinity or NaN.  If software completion is not enabled, trap.
+           If /s is enabled, we'll properly signal for SNaN on output.  */
+        /* ??? I'm not sure these exception bit flags are correct.  I do
+           know that the Linux kernel, at least, doesn't rely on them and
+           just emulates the insn to figure out what exception to use.  */
+        if ((quals & QUAL_S) == 0)
+            helper_excp(EXCP_ARITH, frac ? EXC_M_INV : EXC_M_FOV);
+    }
+
+    return a;
+}
 
 /* F floating (VAX) */
-static inline uint64_t float32_to_f(float32 fa)
+static uint64_t float32_to_f(float32 fa)
 {
     uint64_t r, exp, mant, sig;
     CPU_FloatU a;
@@ -483,7 +571,7 @@  static inline uint64_t float32_to_f(float32 fa)
     return r;
 }
 
-static inline float32 f_to_float32(uint64_t a)
+static float32 f_to_float32(uint64_t a)
 {
     uint32_t exp, mant_sig;
     CPU_FloatU r;
@@ -535,8 +623,6 @@  uint64_t helper_addf (uint64_t a, uint64_t b, uint32_t quals)
     fb = f_to_float32(b);
 
     token = begin_fp(quals);
-    float32_input(quals, fa);
-    float32_input(quals, fb);
     fr = float32_add(fa, fb, &FP_STATUS);
     end_fp(quals, token);
 
@@ -552,8 +638,6 @@  uint64_t helper_subf (uint64_t a, uint64_t b, uint32_t quals)
     fb = f_to_float32(b);
 
     token = begin_fp(quals);
-    float32_input(quals, fa);
-    float32_input(quals, fb);
     fr = float32_sub(fa, fb, &FP_STATUS);
     end_fp(quals, token);
 
@@ -569,8 +653,6 @@  uint64_t helper_mulf (uint64_t a, uint64_t b, uint32_t quals)
     fb = f_to_float32(b);
 
     token = begin_fp(quals);
-    float32_input(quals, fa);
-    float32_input(quals, fb);
     fr = float32_mul(fa, fb, &FP_STATUS);
     end_fp(quals, token);
 
@@ -586,8 +668,6 @@  uint64_t helper_divf (uint64_t a, uint64_t b, uint32_t quals)
     fb = f_to_float32(b);
 
     token = begin_fp(quals);
-    float32_input(quals, fa);
-    float32_input(quals, fb);
     fr = float32_div(fa, fb, &FP_STATUS);
     end_fp(quals, token);
 
@@ -602,7 +682,6 @@  uint64_t helper_sqrtf (uint64_t t, uint32_t quals)
     ft = f_to_float32(t);
 
     token = begin_fp(quals);
-    float32_input(quals, ft);
     fr = float32_sqrt(ft, &FP_STATUS);
     end_fp(quals, token);
 
@@ -611,7 +690,7 @@  uint64_t helper_sqrtf (uint64_t t, uint32_t quals)
 
 
 /* G floating (VAX) */
-static inline uint64_t float64_to_g(float64 fa)
+static uint64_t float64_to_g(float64 fa)
 {
     uint64_t r, exp, mant, sig;
     CPU_DoubleU a;
@@ -644,7 +723,7 @@  static inline uint64_t float64_to_g(float64 fa)
     return r;
 }
 
-static inline float64 g_to_float64(uint64_t a)
+static float64 g_to_float64(uint64_t a)
 {
     uint64_t exp, mant_sig;
     CPU_DoubleU r;
@@ -696,8 +775,6 @@  uint64_t helper_addg (uint64_t a, uint64_t b, uint32_t quals)
     fb = g_to_float64(b);
 
     token = begin_fp(quals);
-    float64_input(quals, fa);
-    float64_input(quals, fb);
     fr = float64_add(fa, fb, &FP_STATUS);
     end_fp(quals, token);
 
@@ -713,8 +790,6 @@  uint64_t helper_subg (uint64_t a, uint64_t b, uint32_t quals)
     fb = g_to_float64(b);
 
     token = begin_fp(quals);
-    float64_input(quals, fa);
-    float64_input(quals, fb);
     fr = float64_sub(fa, fb, &FP_STATUS);
     end_fp(quals, token);
 
@@ -730,8 +805,6 @@  uint64_t helper_mulg (uint64_t a, uint64_t b, uint32_t quals)
     fb = g_to_float64(b);
 
     token = begin_fp(quals);
-    float64_input(quals, fa);
-    float64_input(quals, fb);
     fr = float64_mul(fa, fb, &FP_STATUS);
     end_fp(quals, token);
 
@@ -747,8 +820,6 @@  uint64_t helper_divg (uint64_t a, uint64_t b, uint32_t quals)
     fb = g_to_float64(b);
 
     token = begin_fp(quals);
-    float64_input(quals, fa);
-    float64_input(quals, fb);
     fr = float64_div(fa, fb, &FP_STATUS);
     end_fp(quals, token);
 
@@ -763,7 +834,6 @@  uint64_t helper_sqrtg (uint64_t a, uint32_t quals)
     fa = g_to_float64(a);
 
     token = begin_fp(quals);
-    float64_input(quals, fa);
     fr = float64_sqrt(fa, &FP_STATUS);
     end_fp(quals, token);
 
@@ -774,7 +844,7 @@  uint64_t helper_sqrtg (uint64_t a, uint32_t quals)
 /* S floating (single) */
 
 /* Taken from linux/arch/alpha/kernel/traps.c, s_mem_to_reg.  */
-static inline uint64_t float32_to_s_int(uint32_t fi)
+static uint64_t float32_to_s_int(uint32_t fi)
 {
     uint32_t frac = fi & 0x7fffff;
     uint32_t sign = fi >> 31;
@@ -796,7 +866,7 @@  static inline uint64_t float32_to_s_int(uint32_t fi)
             | ((uint64_t)frac << 29));
 }
 
-static inline uint64_t float32_to_s(float32 fa)
+static uint64_t float32_to_s(float32 fa)
 {
     CPU_FloatU a;
     a.f = fa;
@@ -825,17 +895,19 @@  uint64_t helper_memory_to_s (uint32_t a)
     return float32_to_s_int(a);
 }
 
+static float32 input_s(uint32_t quals, uint64_t a)
+{
+    return s_to_float32(remap_ieee_input(quals, a));
+}
+
 uint64_t helper_adds (uint64_t a, uint64_t b, uint32_t quals)
 {
     float32 fa, fb, fr;
     uint32_t token;
 
-    fa = s_to_float32(a);
-    fb = s_to_float32(b);
-
     token = begin_fp(quals);
-    float32_input(quals, fa);
-    float32_input(quals, fb);
+    fa = input_s(quals, a);
+    fb = input_s(quals, b);
     fr = float32_add(fa, fb, &FP_STATUS);
     end_fp(quals, token);
 
@@ -847,12 +919,9 @@  uint64_t helper_subs (uint64_t a, uint64_t b, uint32_t quals)
     float32 fa, fb, fr;
     uint32_t token;
 
-    fa = s_to_float32(a);
-    fb = s_to_float32(b);
-
     token = begin_fp(quals);
-    float32_input(quals, fa);
-    float32_input(quals, fb);
+    fa = input_s(quals, a);
+    fb = input_s(quals, b);
     fr = float32_sub(fa, fb, &FP_STATUS);
     end_fp(quals, token);
 
@@ -864,12 +933,9 @@  uint64_t helper_muls (uint64_t a, uint64_t b, uint32_t quals)
     float32 fa, fb, fr;
     uint32_t token;
 
-    fa = s_to_float32(a);
-    fb = s_to_float32(b);
-
     token = begin_fp(quals);
-    float32_input(quals, fa);
-    float32_input(quals, fb);
+    fa = input_s(quals, a);
+    fb = input_s(quals, b);
     fr = float32_mul(fa, fb, &FP_STATUS);
     end_fp(quals, token);
 
@@ -881,12 +947,9 @@  uint64_t helper_divs (uint64_t a, uint64_t b, uint32_t quals)
     float32 fa, fb, fr;
     uint32_t token;
 
-    fa = s_to_float32(a);
-    fb = s_to_float32(b);
-
     token = begin_fp(quals);
-    float32_input(quals, fa);
-    float32_input(quals, fb);
+    fa = input_s(quals, a);
+    fb = input_s(quals, b);
     fr = float32_div(fa, fb, &FP_STATUS);
     end_fp(quals, token);
 
@@ -898,10 +961,8 @@  uint64_t helper_sqrts (uint64_t a, uint32_t quals)
     float32 fa, fr;
     uint32_t token;
 
-    fa = s_to_float32(a);
-
     token = begin_fp(quals);
-    float32_input(quals, fa);
+    fa = input_s(quals, a);
     fr = float32_sqrt(fa, &FP_STATUS);
     end_fp(quals, token);
 
@@ -926,17 +987,20 @@  static inline uint64_t float64_to_t(float64 fa)
     return r.ll;
 }
 
+/* Raise any exceptions needed for using F, given the insn qualifiers.  */
+static float64 input_t(uint32_t quals, uint64_t a)
+{
+    return t_to_float64(remap_ieee_input(quals, a));
+}
+
 uint64_t helper_addt (uint64_t a, uint64_t b, uint32_t quals)
 {
     float64 fa, fb, fr;
     uint32_t token;
 
-    fa = t_to_float64(a);
-    fb = t_to_float64(b);
-
     token = begin_fp(quals);
-    float64_input(quals, fa);
-    float64_input(quals, fb);
+    fa = input_t(quals, a);
+    fb = input_t(quals, b);
     fr = float64_add(fa, fb, &FP_STATUS);
     end_fp(quals, token);
 
@@ -948,12 +1012,9 @@  uint64_t helper_subt (uint64_t a, uint64_t b, uint32_t quals)
     float64 fa, fb, fr;
     uint32_t token;
 
-    fa = t_to_float64(a);
-    fb = t_to_float64(b);
-
     token = begin_fp(quals);
-    float64_input(quals, fa);
-    float64_input(quals, fb);
+    fa = input_t(quals, a);
+    fb = input_t(quals, b);
     fr = float64_sub(fa, fb, &FP_STATUS);
     end_fp(quals, token);
     
@@ -965,12 +1026,9 @@  uint64_t helper_mult (uint64_t a, uint64_t b, uint32_t quals)
     float64 fa, fb, fr;
     uint32_t token;
 
-    fa = t_to_float64(a);
-    fb = t_to_float64(b);
-
     token = begin_fp(quals);
-    float64_input(quals, fa);
-    float64_input(quals, fb);
+    fa = input_t(quals, a);
+    fb = input_t(quals, b);
     fr = float64_mul(fa, fb, &FP_STATUS);
     end_fp(quals, token);
 
@@ -982,12 +1040,9 @@  uint64_t helper_divt (uint64_t a, uint64_t b, uint32_t quals)
     float64 fa, fb, fr;
     uint32_t token;
 
-    fa = t_to_float64(a);
-    fb = t_to_float64(b);
-
     token = begin_fp(quals);
-    float64_input(quals, fa);
-    float64_input(quals, fb);
+    fa = input_t(quals, a);
+    fb = input_t(quals, b);
     fr = float64_div(fa, fb, &FP_STATUS);
     end_fp(quals, token);
 
@@ -999,10 +1054,8 @@  uint64_t helper_sqrtt (uint64_t a, uint32_t quals)
     float64 fa, fr;
     uint32_t token;
 
-    fa = t_to_float64(a);
-
     token = begin_fp(quals);
-    float64_input(quals, fa);
+    fa = input_t(quals, a);
     fr = float64_sqrt(fa, &FP_STATUS);
     end_fp(quals, token);
 
@@ -1028,6 +1081,8 @@  uint64_t helper_cpyse(uint64_t a, uint64_t b)
 
 
 /* Comparisons */
+/* ??? Software completion qualifier missing.  */
+
 uint64_t helper_cmptun (uint64_t a, uint64_t b)
 {
     float64 fa, fb;
@@ -1126,10 +1181,8 @@  uint64_t helper_cvtts (uint64_t a, uint32_t quals)
     float32 fr;
     uint32_t token;
 
-    fa = t_to_float64(a);
-
     token = begin_fp(quals);
-    float64_input(quals, fa);
+    fa = input_t(quals, a);
     fr = float64_to_float32(fa, &FP_STATUS);
     end_fp(quals, token);
 
@@ -1142,10 +1195,8 @@  uint64_t helper_cvtst (uint64_t a, uint32_t quals)
     float64 fr;
     uint32_t token;
 
-    fa = s_to_float32(a);
-
     token = begin_fp(quals);
-    float32_input(quals, fa);
+    fa = input_s(quals, a);
     fr = float32_to_float64(fa, &FP_STATUS);
     end_fp(quals, token);
 
@@ -1164,115 +1215,125 @@  uint64_t helper_cvtqs (uint64_t a, uint32_t quals)
     return float32_to_s(fr);
 }
 
-uint64_t helper_cvttq (uint64_t a, uint32_t quals)
-{
-    uint64_t ret, frac;
-    uint32_t token, exp, sign, exc = 0;
-
-    token = begin_fp(quals);
+/* Implement float64 to uint64 conversion without overflow enabled.
+   In this mode we must supply the truncated result.  This behaviour
+   is used by the compiler to get unsigned conversion for free with
+   the same instruction.  */
 
-    /* Alpha integer conversion does not saturate, as the generic routine
-       does.  Instead it supplies a truncated result.  This fact is relied
-       upon by GCC in that without overflow enabled we can get unsigned
-       conversion for free with the same instruction.  */
+static uint64_t cvttq_noqual_internal(uint64_t a, uint32_t rounding_mode)
+{
+    uint64_t frac, ret = 0;
+    uint32_t exp, sign;
+    int shift;
 
     sign = (a >> 63);
     exp = (uint32_t)(a >> 52) & 0x7ff;
     frac = a & 0xfffffffffffffull;
 
-    if (exp == 0) {
-        ret = 0;
-        if (frac != 0) {
-            /* ??? If DNZ set, map to zero without trapping.  */
-            /* ??? Figure out what kind of exception signal to send.  */
-            if (!(quals & 0x400))
-                helper_excp(EXCP_ARITH, 0);
-            goto do_underflow;
-        }
-    } else if (exp == 0x7ff) {
-        /* In keeping with the truncation result, both infinity and NaN
-           give result of zero.  See Table B-2 in the Alpha Architecture
-           Handbook.  */
-        ret = 0;
-        exc = float_flag_invalid;
+    /* We already handled denormals in remap_ieee_input; infinities and
+       nans are defined to return zero as per truncation.  */
+    if (exp == 0 || exp == 0x7ff)
+        return 0;
 
-        /* Without /s qualifier, both Inf and NaN trap.  SNaN always traps. */
-        if (!(quals & 0x400) || (frac & 0x4000000000000ull))
-            helper_excp(EXCP_ARITH, 0);
+    /* Restore implicit bit.  */
+    frac |= 0x10000000000000ull;
+
+    /* Note that neither overflow exceptions nor inexact exceptions
+       are desired.  This lets us streamline the checks quite a bit.  */
+    shift = exp - 1023 - 52;
+    if (shift >= 0) {
+        /* In this case the number is so large that we must shift
+           the fraction left.  There is no rounding to do.  */
+        if (shift < 63) {
+            ret = frac << shift;
+        }
     } else {
-        int32_t shift;
-
-        /* Restore implicit bit.  */
-        frac |= 0x10000000000000ull;
-
-        shift = exp - 1023 - 52;
-        if (shift > 0) {
-            /* In this case the number is so large that we must shift
-               the fraction left.  There is no rounding to do, but we
-               must still set inexact for overflow.  */
-            if (shift < 63) {
-                ret = frac << shift;
-                if ((ret >> shift) != frac)
-                    exc = float_flag_inexact;
-            } else {
-                exc = float_flag_inexact;
-                ret = 0;
-            }
-        } else if (shift == 0) {
-            /* The exponent is exactly right for the 52-bit fraction.  */
-            ret = frac;
+        uint64_t round;
+
+        /* In this case the number is smaller than the fraction as
+           represented by the 52 bit number.  Here we must think 
+           about rounding the result.  Handle this by shifting the
+           fractional part of the number into the high bits of ROUND.
+           This will let us efficiently handle round-to-nearest.  */
+        shift = -shift;
+        if (shift < 63) {
+            ret = frac >> shift;
+            round = frac << (64 - shift);
         } else {
-            uint64_t round;
-
-            /* In this case the number is smaller than the fraction as
-               represented by the 52 bit number.  Here we must think 
-               about rounding the result.  Handle this by shifting the
-               fractional part of the number into the high bits of ROUND.
-               This will let us efficiently handle round-to-nearest.  */
-            shift = -shift;
-            if (shift < 63) {
-                ret = frac >> shift;
-                round = frac << (64 - shift);
-            } else {
-            do_underflow:
-                /* The exponent is so small we shift out everything.  */
-                ret = 0;
-                round = 1;
-            }
+            /* The exponent is so small we shift out everything.
+               Leave a sticky bit for proper rounding below.  */
+            round = 1;
+        }
 
-            if (round) {
-                exc = float_flag_inexact;
-                switch (FP_STATUS.float_rounding_mode) {
-                case float_round_nearest_even:
-                    if (round == (1ull << 63)) {
-                        /* The remaining fraction is exactly 0.5;
-                           round to even.  */
-                        ret += (ret & 1);
-                    } else if (round > (1ull << 63)) {
-                        ret += 1;
-                    }
-                    break;
-                case float_round_to_zero:
-                    break;
-                case float_round_up:
-                    if (!sign)
-                        ret += 1;
-                    break;
-                case float_round_down:
-                    if (sign)
-                        ret += 1;
-                    break;
+        if (round) {
+            switch (rounding_mode) {
+            case float_round_nearest_even:
+                if (round == (1ull << 63)) {
+                    /* Remaining fraction is exactly 0.5; round to even.  */
+                    ret += (ret & 1);
+                } else if (round > (1ull << 63)) {
+                    ret += 1;
                 }
+                break;
+            case float_round_to_zero:
+                break;
+            case float_round_up:
+                if (!sign)
+                    ret += 1;
+                break;
+            case float_round_down:
+                if (sign)
+                    ret += 1;
+                break;
             }
         }
-
-        if (sign)
-            ret = -ret;
     }
 
-    if (exc)
-        float_raise(exc, &FP_STATUS);
-    end_fp(quals, token);
+    if (sign)
+        ret = -ret;
+    return ret;
+}
+
+uint64_t helper_cvttq (uint64_t a, uint32_t quals)
+{
+    uint64_t ret;
+
+    a = remap_ieee_input(quals, a);
+
+    if (quals & QUAL_V) {
+        float64 fa = t_to_float64(a);
+        uint32_t token;
+
+        token = begin_fp_exception();
+        if ((quals & QUAL_RM_MASK) == QUAL_RM_C) {
+            ret = float64_to_int64_round_to_zero(fa, &FP_STATUS);
+        } else {
+            token |= begin_fp_roundmode(quals);
+            ret = float64_to_int64(fa, &FP_STATUS);
+            end_fp_roundmode(token);
+        }
+        end_fp_exception(quals, token);
+    } else {
+        uint32_t round_mode;
+
+        switch (quals & QUAL_RM_MASK) {
+        case QUAL_RM_N:
+            round_mode = float_round_nearest_even;
+            break;
+        case QUAL_RM_C:
+        default:
+            round_mode = float_round_to_zero;
+            break;
+        case QUAL_RM_M:
+            round_mode = float_round_down;
+            break;
+        case QUAL_RM_D:
+            round_mode = FP_STATUS.float_rounding_mode;
+            break;
+        }
+
+        ret = cvttq_noqual_internal(a, round_mode);
+    }
 
     return ret;
 }
@@ -1310,7 +1371,6 @@  uint64_t helper_cvtgf (uint64_t a, uint32_t quals)
     fa = g_to_float64(a);
 
     token = begin_fp(quals);
-    float64_input(quals, fa);
     fr = float64_to_float32(fa, &FP_STATUS);
     end_fp(quals, token);
 
@@ -1326,7 +1386,6 @@  uint64_t helper_cvtgq (uint64_t a, uint32_t quals)
     fa = g_to_float64(a);
 
     token = begin_fp(quals);
-    float64_input(quals, fa);
     ret = float64_to_int64(fa, &FP_STATUS);
     end_fp(quals, token);
 
@@ -1352,35 +1411,24 @@  uint64_t helper_cvtlq (uint64_t a)
     return (lo & 0x3FFFFFFF) | (hi & 0xc0000000);
 }
 
-static inline uint64_t __helper_cvtql(uint64_t a, int s, int v)
-{
-    uint64_t r;
-
-    r = ((uint64_t)(a & 0xC0000000)) << 32;
-    r |= ((uint64_t)(a & 0x7FFFFFFF)) << 29;
-
-    if (v && (int64_t)((int32_t)r) != (int64_t)r) {
-        helper_excp(EXCP_ARITH, EXCP_ARITH_OVERFLOW);
-    }
-    if (s) {
-        /* TODO */
-    }
-    return r;
-}
-
 uint64_t helper_cvtql (uint64_t a)
 {
-    return __helper_cvtql(a, 0, 0);
+    return ((a & 0xC0000000) << 32) | ((a & 0x7FFFFFFF) << 29);
 }
 
 uint64_t helper_cvtqlv (uint64_t a)
 {
-    return __helper_cvtql(a, 0, 1);
+    if ((int32_t)a != (int64_t)a)
+        helper_excp(EXCP_ARITH, EXC_M_IOV);
+    return helper_cvtql(a);
 }
 
 uint64_t helper_cvtqlsv (uint64_t a)
 {
-    return __helper_cvtql(a, 1, 1);
+    /* ??? I'm pretty sure there's nothing that /sv needs to do that /v
+       doesn't do.  The only thing I can think is that /sv is a valid
+       instruction merely for completeness in the ISA.  */
+    return helper_cvtqlv(a);
 }
 
 /* PALcode support special instructions */