diff mbox

target-alpha: squashed fpu qualifiers patch

Message ID 4B2BFD85.6070702@twiddle.net
State New
Headers show

Commit Message

Richard Henderson Dec. 18, 2009, 10:09 p.m. UTC
This is a squashed version of the 3 or 4 incremental patches that I had 
sent out for implementing the alpha fpu instruction qualifiers.


r~
commit 572164702dd83955fc8783c85811ec86c3fb6e4a
Author: Richard Henderson <rth@twiddle.net>
Date:   Fri Dec 18 10:50:32 2009 -0800

    target-alpha: Implement fp insn qualifiers.
    
    Adds a third constant argument to the fpu helpers, which contain the
    unparsed qualifier bits.  The helper functions use new begin_fp/end_fp
    routines that extract the rounding mode from the qualifier bits, as
    well as raise exceptions for non-finite inputs and outputs also as
    directed by the qualifier bits.
    
    cpu_alpha_load/store_fpcr modified to load/store the majority of the
    bits from env->fpcr.  This because we hadn't been saving a few of the
    fpcr bits in the fp_status field: in particular DNZ.
    
    Re-implement cvttq without saturation of overflow results, to match
    the Alpha specification.
    
    Signed-off-by: Richard Henderson <rth@twiddle.net>

Comments

Aurelien Jarno Dec. 24, 2009, 3:18 p.m. UTC | #1
On Fri, Dec 18, 2009 at 02:09:09PM -0800, Richard Henderson wrote:
> This is a squashed version of the 3 or 4 incremental patches that I
> had sent out for implementing the alpha fpu instruction qualifiers.
> 
> 

First of all, this patch has a lot of coding style issues. I have
reported some of them at the beginning of the file, but stopped at some
point.

My main concern about this patch is that I don't really understand why
the current fp exceptions, the current rounding mode or flush_to_zero 
mode are stored in FP_STATUS. I think it would be better to have 
dedicated variable(s) in the cpu state structure, as it is done in other
emulated architectures. 

For example instead of saving the exception, doing a few fp
instructions, and restoring them, it is better to have a separate
variable that holds the current CPU FPU state (which probably already
exists as (part of) a CPU register), always clear the
FP_STATUS.float_exception_flags variable before an instruction or
sequence of instructions, and copy the bits that needs to be copied back
to the variable holding the CPU FPU state.

That would save a lot of mask and shift operation that is currently done
in your patch, and also a lot of save and restore operations when
executing code.

> commit 572164702dd83955fc8783c85811ec86c3fb6e4a
> Author: Richard Henderson <rth@twiddle.net>
> Date:   Fri Dec 18 10:50:32 2009 -0800
> 
>     target-alpha: Implement fp insn qualifiers.
>     
>     Adds a third constant argument to the fpu helpers, which contain the
>     unparsed qualifier bits.  The helper functions use new begin_fp/end_fp
>     routines that extract the rounding mode from the qualifier bits, as
>     well as raise exceptions for non-finite inputs and outputs also as
>     directed by the qualifier bits.
>     
>     cpu_alpha_load/store_fpcr modified to load/store the majority of the
>     bits from env->fpcr.  This because we hadn't been saving a few of the
>     fpcr bits in the fp_status field: in particular DNZ.
>     
>     Re-implement cvttq without saturation of overflow results, to match
>     the Alpha specification.
>     
>     Signed-off-by: Richard Henderson <rth@twiddle.net>
> 
> diff --git a/target-alpha/cpu.h b/target-alpha/cpu.h
> index c0dff4b..c1c0470 100644
> --- a/target-alpha/cpu.h
> +++ b/target-alpha/cpu.h
> @@ -430,9 +430,13 @@ enum {
>  };
>  
>  /* Arithmetic exception */
> -enum {
> -    EXCP_ARITH_OVERFLOW,
> -};
> +#define EXC_M_IOV	(1<<16)		/* Integer Overflow */
> +#define EXC_M_INE	(1<<15)		/* Inexact result */
> +#define EXC_M_UNF	(1<<14)		/* Underflow */
> +#define EXC_M_FOV	(1<<13)		/* Overflow */
> +#define EXC_M_DZE	(1<<12)		/* Division by zero */
> +#define EXC_M_INV	(1<<11)		/* Invalid operation */
> +#define EXC_M_SWC	(1<<10)		/* Software completion */
>  
>  enum {
>      IR_V0   = 0,
> diff --git a/target-alpha/helper.c b/target-alpha/helper.c
> index be7d37b..94821bd 100644
> --- a/target-alpha/helper.c
> +++ b/target-alpha/helper.c
> @@ -27,41 +27,13 @@
>  
>  uint64_t cpu_alpha_load_fpcr (CPUState *env)
>  {
> -    uint64_t ret = 0;
> -    int flags, mask;
> -
> -    flags = env->fp_status.float_exception_flags;
> -    ret |= (uint64_t) flags << 52;
> -    if (flags)
> -        ret |= FPCR_SUM;
> -    env->ipr[IPR_EXC_SUM] &= ~0x3E;
> -    env->ipr[IPR_EXC_SUM] |= flags << 1;
> -
> -    mask = env->fp_status.float_exception_mask;
> -    if (mask & float_flag_invalid)
> -        ret |= FPCR_INVD;
> -    if (mask & float_flag_divbyzero)
> -        ret |= FPCR_DZED;
> -    if (mask & float_flag_overflow)
> -        ret |= FPCR_OVFD;
> -    if (mask & float_flag_underflow)
> -        ret |= FPCR_UNFD;
> -    if (mask & float_flag_inexact)
> -        ret |= FPCR_INED;
> -
> -    switch (env->fp_status.float_rounding_mode) {
> -    case float_round_nearest_even:
> -        ret |= 2ULL << FPCR_DYN_SHIFT;
> -        break;
> -    case float_round_down:
> -        ret |= 1ULL << FPCR_DYN_SHIFT;
> -        break;
> -    case float_round_up:
> -        ret |= 3ULL << FPCR_DYN_SHIFT;
> -        break;
> -    case float_round_to_zero:
> -        break;
> -    }
> +    uint64_t ret = env->fp_status.float_exception_flags;
> +
> +    if (ret)
> +      ret = FPCR_SUM | (ret << 52);

Coding style.

> +
> +    ret |= env->fpcr & ~(FPCR_SUM | FPCR_STATUS_MASK);
> +
>      return ret;
>  }
>  
> @@ -69,6 +41,8 @@ void cpu_alpha_store_fpcr (CPUState *env, uint64_t val)
>  {
>      int round_mode, mask;
>  
> +    env->fpcr = val;
> +
>      set_float_exception_flags((val >> 52) & 0x3F, &env->fp_status);
>  
>      mask = 0;
> @@ -86,6 +60,7 @@ void cpu_alpha_store_fpcr (CPUState *env, uint64_t val)
>  
>      switch ((val >> FPCR_DYN_SHIFT) & 3) {
>      case 0:
> +    default:
>          round_mode = float_round_to_zero;
>          break;
>      case 1:
> @@ -100,6 +75,11 @@ void cpu_alpha_store_fpcr (CPUState *env, uint64_t val)
>          break;
>      }
>      set_float_rounding_mode(round_mode, &env->fp_status);
> +
> +    mask = 0;
> +    if ((val & (FPCR_UNDZ|FPCR_UNFD)) == (FPCR_UNDZ|FPCR_UNFD))
> +        mask = 1;

Coding style. Also the name of the variable "mask" is a bit misleading
for true/false variable.

> +    set_flush_to_zero(mask, &env->fp_status);
>  }
>  
>  #if defined(CONFIG_USER_ONLY)
> diff --git a/target-alpha/helper.h b/target-alpha/helper.h
> index bedd3c0..1521a84 100644
> --- a/target-alpha/helper.h
> +++ b/target-alpha/helper.h
> @@ -41,33 +41,33 @@ DEF_HELPER_1(store_fpcr, void, i64)
>  
>  DEF_HELPER_1(f_to_memory, i32, i64)
>  DEF_HELPER_1(memory_to_f, i64, i32)
> -DEF_HELPER_2(addf, i64, i64, i64)
> -DEF_HELPER_2(subf, i64, i64, i64)
> -DEF_HELPER_2(mulf, i64, i64, i64)
> -DEF_HELPER_2(divf, i64, i64, i64)
> -DEF_HELPER_1(sqrtf, i64, i64)
> +DEF_HELPER_3(addf, i64, i64, i64, i32)
> +DEF_HELPER_3(subf, i64, i64, i64, i32)
> +DEF_HELPER_3(mulf, i64, i64, i64, i32)
> +DEF_HELPER_3(divf, i64, i64, i64, i32)
> +DEF_HELPER_2(sqrtf, i64, i64, i32)
>  
>  DEF_HELPER_1(g_to_memory, i64, i64)
>  DEF_HELPER_1(memory_to_g, i64, i64)
> -DEF_HELPER_2(addg, i64, i64, i64)
> -DEF_HELPER_2(subg, i64, i64, i64)
> -DEF_HELPER_2(mulg, i64, i64, i64)
> -DEF_HELPER_2(divg, i64, i64, i64)
> -DEF_HELPER_1(sqrtg, i64, i64)
> +DEF_HELPER_3(addg, i64, i64, i64, i32)
> +DEF_HELPER_3(subg, i64, i64, i64, i32)
> +DEF_HELPER_3(mulg, i64, i64, i64, i32)
> +DEF_HELPER_3(divg, i64, i64, i64, i32)
> +DEF_HELPER_2(sqrtg, i64, i64, i32)
>  
>  DEF_HELPER_1(s_to_memory, i32, i64)
>  DEF_HELPER_1(memory_to_s, i64, i32)
> -DEF_HELPER_2(adds, i64, i64, i64)
> -DEF_HELPER_2(subs, i64, i64, i64)
> -DEF_HELPER_2(muls, i64, i64, i64)
> -DEF_HELPER_2(divs, i64, i64, i64)
> -DEF_HELPER_1(sqrts, i64, i64)
> -
> -DEF_HELPER_2(addt, i64, i64, i64)
> -DEF_HELPER_2(subt, i64, i64, i64)
> -DEF_HELPER_2(mult, i64, i64, i64)
> -DEF_HELPER_2(divt, i64, i64, i64)
> -DEF_HELPER_1(sqrtt, i64, i64)
> +DEF_HELPER_3(adds, i64, i64, i64, i32)
> +DEF_HELPER_3(subs, i64, i64, i64, i32)
> +DEF_HELPER_3(muls, i64, i64, i64, i32)
> +DEF_HELPER_3(divs, i64, i64, i64, i32)
> +DEF_HELPER_2(sqrts, i64, i64, i32)
> +
> +DEF_HELPER_3(addt, i64, i64, i64, i32)
> +DEF_HELPER_3(subt, i64, i64, i64, i32)
> +DEF_HELPER_3(mult, i64, i64, i64, i32)
> +DEF_HELPER_3(divt, i64, i64, i64, i32)
> +DEF_HELPER_2(sqrtt, i64, i64, i32)
>  
>  DEF_HELPER_2(cmptun, i64, i64, i64)
>  DEF_HELPER_2(cmpteq, i64, i64, i64)
> @@ -81,15 +81,15 @@ DEF_HELPER_2(cpys, i64, i64, i64)
>  DEF_HELPER_2(cpysn, i64, i64, i64)
>  DEF_HELPER_2(cpyse, i64, i64, i64)
>  
> -DEF_HELPER_1(cvtts, i64, i64)
> -DEF_HELPER_1(cvtst, i64, i64)
> -DEF_HELPER_1(cvttq, i64, i64)
> -DEF_HELPER_1(cvtqs, i64, i64)
> -DEF_HELPER_1(cvtqt, i64, i64)
> -DEF_HELPER_1(cvtqf, i64, i64)
> -DEF_HELPER_1(cvtgf, i64, i64)
> -DEF_HELPER_1(cvtgq, i64, i64)
> -DEF_HELPER_1(cvtqg, i64, i64)
> +DEF_HELPER_2(cvtts, i64, i64, i32)
> +DEF_HELPER_2(cvtst, i64, i64, i32)
> +DEF_HELPER_2(cvttq, i64, i64, i32)
> +DEF_HELPER_2(cvtqs, i64, i64, i32)
> +DEF_HELPER_2(cvtqt, i64, i64, i32)
> +DEF_HELPER_2(cvtqf, i64, i64, i32)
> +DEF_HELPER_2(cvtgf, i64, i64, i32)
> +DEF_HELPER_2(cvtgq, i64, i64, i32)
> +DEF_HELPER_2(cvtqg, i64, i64, i32)
>  DEF_HELPER_1(cvtlq, i64, i64)
>  DEF_HELPER_1(cvtql, i64, i64)
>  DEF_HELPER_1(cvtqlv, i64, i64)
> diff --git a/target-alpha/op_helper.c b/target-alpha/op_helper.c
> index b2abf6c..2d1c3d5 100644
> --- a/target-alpha/op_helper.c
> +++ b/target-alpha/op_helper.c
> @@ -24,7 +24,7 @@
>  
>  /*****************************************************************************/
>  /* Exceptions processing helpers */
> -void helper_excp (int excp, int error)
> +void QEMU_NORETURN helper_excp (int excp, int error)
>  {
>      env->exception_index = excp;
>      env->error_code = error;
> @@ -78,7 +78,7 @@ uint64_t helper_addqv (uint64_t op1, uint64_t op2)
>      uint64_t tmp = op1;
>      op1 += op2;
>      if (unlikely((tmp ^ op2 ^ (-1ULL)) & (tmp ^ op1) & (1ULL << 63))) {
> -        helper_excp(EXCP_ARITH, EXCP_ARITH_OVERFLOW);
> +        helper_excp(EXCP_ARITH, EXC_M_IOV);
>      }
>      return op1;
>  }
> @@ -88,7 +88,7 @@ uint64_t helper_addlv (uint64_t op1, uint64_t op2)
>      uint64_t tmp = op1;
>      op1 = (uint32_t)(op1 + op2);
>      if (unlikely((tmp ^ op2 ^ (-1UL)) & (tmp ^ op1) & (1UL << 31))) {
> -        helper_excp(EXCP_ARITH, EXCP_ARITH_OVERFLOW);
> +        helper_excp(EXCP_ARITH, EXC_M_IOV);
>      }
>      return op1;
>  }
> @@ -98,7 +98,7 @@ uint64_t helper_subqv (uint64_t op1, uint64_t op2)
>      uint64_t res;
>      res = op1 - op2;
>      if (unlikely((op1 ^ op2) & (res ^ op1) & (1ULL << 63))) {
> -        helper_excp(EXCP_ARITH, EXCP_ARITH_OVERFLOW);
> +        helper_excp(EXCP_ARITH, EXC_M_IOV);
>      }
>      return res;
>  }
> @@ -108,7 +108,7 @@ uint64_t helper_sublv (uint64_t op1, uint64_t op2)
>      uint32_t res;
>      res = op1 - op2;
>      if (unlikely((op1 ^ op2) & (res ^ op1) & (1UL << 31))) {
> -        helper_excp(EXCP_ARITH, EXCP_ARITH_OVERFLOW);
> +        helper_excp(EXCP_ARITH, EXC_M_IOV);
>      }
>      return res;
>  }
> @@ -118,7 +118,7 @@ uint64_t helper_mullv (uint64_t op1, uint64_t op2)
>      int64_t res = (int64_t)op1 * (int64_t)op2;
>  
>      if (unlikely((int32_t)res != res)) {
> -        helper_excp(EXCP_ARITH, EXCP_ARITH_OVERFLOW);
> +        helper_excp(EXCP_ARITH, EXC_M_IOV);
>      }
>      return (int64_t)((int32_t)res);
>  }
> @@ -130,7 +130,7 @@ uint64_t helper_mulqv (uint64_t op1, uint64_t op2)
>      muls64(&tl, &th, op1, op2);
>      /* If th != 0 && th != -1, then we had an overflow */
>      if (unlikely((th + 1) > 1)) {
> -        helper_excp(EXCP_ARITH, EXCP_ARITH_OVERFLOW);
> +        helper_excp(EXCP_ARITH, EXC_M_IOV);
>      }
>      return tl;
>  }
> @@ -370,8 +370,175 @@ uint64_t helper_unpkbw (uint64_t op1)
>  
>  /* Floating point helpers */
>  
> +/* ??? Not implemented is setting EXC_MASK, containing a bitmask of
> +   destination registers of instructions that have caused arithmetic
> +   traps.  Not needed for userspace emulation, or for complete 
> +   emulation of the entire fpu stack within qemu.  But we would need
> +   it to invoke a guest kernel's entArith trap handler properly.
> +   
> +   It would be possible to encode the FP destination register in the
> +   QUAL parameter for the FPU helpers below; additional changes would
> +   be required for ADD/V et al above.  */
> +
> +#define QUAL_RM_N	0x080	/* Round mode nearest even */
> +#define QUAL_RM_C	0x000	/* Round mode chopped */
> +#define QUAL_RM_M	0x040	/* Round mode minus infinity */
> +#define QUAL_RM_D	0x0c0	/* Round mode dynamic */
> +#define QUAL_RM_MASK	0x0c0
> +
> +#define QUAL_U		0x100	/* Underflow enable (fp output) */
> +#define QUAL_V		0x100	/* Overflow enable (int output) */
> +#define QUAL_S		0x400	/* Software completion enable */
> +#define QUAL_I		0x200	/* Inexact detection enable */
> +
> +/* If the floating-point qualifiers specified a rounding mode,
> +   set that rounding mode and remember the original mode for
> +   resetting at the end of the instruction.  */
> +static inline uint32_t begin_fp_roundmode(uint32_t qual)
> +{
> +    uint32_t rm = FP_STATUS.float_rounding_mode, old_rm = rm;
> +
> +    switch (qual & QUAL_RM_MASK) {
> +    default:
> +    case QUAL_RM_N:
> +        rm = float_round_nearest_even;
> +        break;
> +    case QUAL_RM_C:
> +        rm = float_round_to_zero;
> +        break;
> +    case QUAL_RM_M:
> +        rm = float_round_down;
> +        break;
> +    case QUAL_RM_D:
> +        return old_rm;

Does it corresponds to the mode describe above as to be implemented?

> +    }
> +    if (old_rm != rm)
> +        set_float_rounding_mode(rm, &FP_STATUS);

Coding style.

> +    return old_rm;
> +}
> +
> +/* Zero the exception flags so that we can determine if the current
> +   instruction raises any exceptions.  Save the old acrued exception
> +   status so that we can restore them at the end of the insn.  */
> +static inline uint32_t begin_fp_exception(void)
> +{
> +    uint32_t old_exc = (uint32_t)FP_STATUS.float_exception_flags << 8;
> +    set_float_exception_flags(0, &FP_STATUS);
> +    return old_exc;
> +}
> +
> +static inline uint32_t begin_fp_flush_to_zero(uint32_t quals)
> +{
> +    /* If underflow detection is disabled, silently flush to zero.
> +       Note that flush-to-zero mode may already be enabled via the FPCR.  */
> +    if ((quals & QUAL_U) == 0 && !FP_STATUS.flush_to_zero) {
> +        set_flush_to_zero(1, &FP_STATUS);
> +        return 0x10000;

What does this constant corresponds to?

> +    }
> +    return 0;
> +}
> +
> +/* Begin processing an fp operation.  Return a token that should be passed
> +   when completing the fp operation.  */
> +static uint32_t begin_fp(uint32_t quals)
> +{
> +    uint32_t ret = 0;
> +
> +    ret |= begin_fp_roundmode(quals);
> +    ret |= begin_fp_flush_to_zero(quals);
> +    ret |= begin_fp_exception();
> +
> +    return ret;
> +}
> +
> +/* End processing an fp operation.  */
> +
> +static inline void end_fp_roundmode(uint32_t orig)
> +{
> +    uint32_t rm = FP_STATUS.float_rounding_mode, old_rm = orig & 0xff;
> +    if (unlikely(rm != old_rm))
> +        set_float_rounding_mode(old_rm, &FP_STATUS);

coding style

> +}
> +
> +static inline void end_fp_flush_to_zero(uint32_t orig)
> +{
> +    if (orig & 0x10000)

What does this constant corresponds to? I guess it matches the previous
one.

> +        set_flush_to_zero(0, &FP_STATUS);

coding style

> +}
> +
> +static void end_fp_exception(uint32_t quals, uint32_t orig)
> +{
> +    uint8_t exc = FP_STATUS.float_exception_flags;
> +
> +    /* If inexact detection is disabled, silently clear it.  */
> +    if ((quals & QUAL_I) == 0)
> +        exc &= ~float_flag_inexact;

Coding style.

> +
> +    orig = (orig >> 8) & 0xff;
> +    set_float_exception_flags(exc | orig, &FP_STATUS);
> +
> +    /* Raise an exception as required.  */
> +    if (unlikely(exc)) {
> +        if (quals & QUAL_S)
> +            exc &= ~FP_STATUS.float_exception_mask;
> +        if (exc) {
> +            uint32_t hw_exc = 0;
> +
> +            if (exc & float_flag_invalid)
> +                hw_exc |= EXC_M_INV;
> +            if (exc & float_flag_divbyzero)
> +                hw_exc |= EXC_M_DZE;
> +            if (exc & float_flag_overflow)
> +                hw_exc |= EXC_M_FOV;
> +            if (exc & float_flag_underflow)
> +                hw_exc |= EXC_M_UNF;
> +            if (exc & float_flag_inexact)
> +                hw_exc |= EXC_M_INE;
> +
> +            helper_excp(EXCP_ARITH, hw_exc);
> +        }
> +    }
> +}
> +
> +static void end_fp(uint32_t quals, uint32_t orig)
> +{
> +    end_fp_roundmode(orig);
> +    end_fp_flush_to_zero(orig);
> +    end_fp_exception(quals, orig);
> +}
> +
> +static uint64_t remap_ieee_input(uint32_t quals, uint64_t a)
> +{
> +    uint64_t frac;
> +    uint32_t exp;
> +
> +    exp = (uint32_t)(a >> 52) & 0x7ff;
> +    frac = a & 0xfffffffffffffull;
> +
> +    if (exp == 0) {
> +        if (frac != 0) {
> +            /* If DNZ is set, flush denormals to zero on input.  */
> +            if (env->fpcr & FPCR_DNZ)
> +                a = a & (1ull << 63);
> +            /* If software completion not enabled, trap.  */
> +            else if ((quals & QUAL_S) == 0)
> +                helper_excp(EXCP_ARITH, EXC_M_UNF);
> +        }
> +    } else if (exp == 0x7ff) {
> +        /* Infinity or NaN.  If software completion is not enabled, trap.
> +           If /s is enabled, we'll properly signal for SNaN on output.  */
> +        /* ??? I'm not sure these exception bit flags are correct.  I do
> +           know that the Linux kernel, at least, doesn't rely on them and
> +           just emulates the insn to figure out what exception to use.  */
> +        if ((quals & QUAL_S) == 0)
> +            helper_excp(EXCP_ARITH, frac ? EXC_M_INV : EXC_M_FOV);

Coding style.

> +    }
> +
> +    return a;
> +}
> +
>  /* F floating (VAX) */
> -static inline uint64_t float32_to_f(float32 fa)
> +static uint64_t float32_to_f(float32 fa)
>  {
>      uint64_t r, exp, mant, sig;
>      CPU_FloatU a;
> @@ -404,7 +571,7 @@ static inline uint64_t float32_to_f(float32 fa)
>      return r;
>  }
>  
> -static inline float32 f_to_float32(uint64_t a)
> +static float32 f_to_float32(uint64_t a)
>  {
>      uint32_t exp, mant_sig;
>      CPU_FloatU r;
> @@ -447,58 +614,83 @@ uint64_t helper_memory_to_f (uint32_t a)
>      return r;
>  }
>  
> -uint64_t helper_addf (uint64_t a, uint64_t b)
> +uint64_t helper_addf (uint64_t a, uint64_t b, uint32_t quals)
>  {
>      float32 fa, fb, fr;
> +    uint32_t token;
>  
>      fa = f_to_float32(a);
>      fb = f_to_float32(b);
> +
> +    token = begin_fp(quals);
>      fr = float32_add(fa, fb, &FP_STATUS);
> +    end_fp(quals, token);
> +
>      return float32_to_f(fr);
>  }
>  
> -uint64_t helper_subf (uint64_t a, uint64_t b)
> +uint64_t helper_subf (uint64_t a, uint64_t b, uint32_t quals)
>  {
>      float32 fa, fb, fr;
> +    uint32_t token;
>  
>      fa = f_to_float32(a);
>      fb = f_to_float32(b);
> +
> +    token = begin_fp(quals);
>      fr = float32_sub(fa, fb, &FP_STATUS);
> +    end_fp(quals, token);
> +
>      return float32_to_f(fr);
>  }
>  
> -uint64_t helper_mulf (uint64_t a, uint64_t b)
> +uint64_t helper_mulf (uint64_t a, uint64_t b, uint32_t quals)
>  {
>      float32 fa, fb, fr;
> +    uint32_t token;
>  
>      fa = f_to_float32(a);
>      fb = f_to_float32(b);
> +
> +    token = begin_fp(quals);
>      fr = float32_mul(fa, fb, &FP_STATUS);
> +    end_fp(quals, token);
> +
>      return float32_to_f(fr);
>  }
>  
> -uint64_t helper_divf (uint64_t a, uint64_t b)
> +uint64_t helper_divf (uint64_t a, uint64_t b, uint32_t quals)
>  {
>      float32 fa, fb, fr;
> +    uint32_t token;
>  
>      fa = f_to_float32(a);
>      fb = f_to_float32(b);
> +
> +    token = begin_fp(quals);
>      fr = float32_div(fa, fb, &FP_STATUS);
> +    end_fp(quals, token);
> +
>      return float32_to_f(fr);
>  }
>  
> -uint64_t helper_sqrtf (uint64_t t)
> +uint64_t helper_sqrtf (uint64_t t, uint32_t quals)
>  {
>      float32 ft, fr;
> +    uint32_t token;
>  
>      ft = f_to_float32(t);
> +
> +    token = begin_fp(quals);
>      fr = float32_sqrt(ft, &FP_STATUS);
> +    end_fp(quals, token);
> +
>      return float32_to_f(fr);
>  }
>  
>  
>  /* G floating (VAX) */
> -static inline uint64_t float64_to_g(float64 fa)
> +static uint64_t float64_to_g(float64 fa)
>  {
>      uint64_t r, exp, mant, sig;
>      CPU_DoubleU a;
> @@ -531,7 +723,7 @@ static inline uint64_t float64_to_g(float64 fa)
>      return r;
>  }
>  
> -static inline float64 g_to_float64(uint64_t a)
> +static float64 g_to_float64(uint64_t a)
>  {
>      uint64_t exp, mant_sig;
>      CPU_DoubleU r;
> @@ -574,52 +766,77 @@ uint64_t helper_memory_to_g (uint64_t a)
>      return r;
>  }
>  
> -uint64_t helper_addg (uint64_t a, uint64_t b)
> +uint64_t helper_addg (uint64_t a, uint64_t b, uint32_t quals)
>  {
>      float64 fa, fb, fr;
> +    uint32_t token;
>  
>      fa = g_to_float64(a);
>      fb = g_to_float64(b);
> +
> +    token = begin_fp(quals);
>      fr = float64_add(fa, fb, &FP_STATUS);
> +    end_fp(quals, token);
> +
>      return float64_to_g(fr);
>  }
>  
> -uint64_t helper_subg (uint64_t a, uint64_t b)
> +uint64_t helper_subg (uint64_t a, uint64_t b, uint32_t quals)
>  {
>      float64 fa, fb, fr;
> +    uint32_t token;
>  
>      fa = g_to_float64(a);
>      fb = g_to_float64(b);
> +
> +    token = begin_fp(quals);
>      fr = float64_sub(fa, fb, &FP_STATUS);
> +    end_fp(quals, token);
> +
>      return float64_to_g(fr);
>  }
>  
> -uint64_t helper_mulg (uint64_t a, uint64_t b)
> +uint64_t helper_mulg (uint64_t a, uint64_t b, uint32_t quals)
>  {
>      float64 fa, fb, fr;
> -
> +    uint32_t token;
> +    
>      fa = g_to_float64(a);
>      fb = g_to_float64(b);
> +
> +    token = begin_fp(quals);
>      fr = float64_mul(fa, fb, &FP_STATUS);
> +    end_fp(quals, token);
> +
>      return float64_to_g(fr);
>  }
>  
> -uint64_t helper_divg (uint64_t a, uint64_t b)
> +uint64_t helper_divg (uint64_t a, uint64_t b, uint32_t quals)
>  {
>      float64 fa, fb, fr;
> +    uint32_t token;
>  
>      fa = g_to_float64(a);
>      fb = g_to_float64(b);
> +
> +    token = begin_fp(quals);
>      fr = float64_div(fa, fb, &FP_STATUS);
> +    end_fp(quals, token);
> +
>      return float64_to_g(fr);
>  }
>  
> -uint64_t helper_sqrtg (uint64_t a)
> +uint64_t helper_sqrtg (uint64_t a, uint32_t quals)
>  {
>      float64 fa, fr;
> +    uint32_t token;
>  
>      fa = g_to_float64(a);
> +
> +    token = begin_fp(quals);
>      fr = float64_sqrt(fa, &FP_STATUS);
> +    end_fp(quals, token);
> +
>      return float64_to_g(fr);
>  }
>  
> @@ -627,7 +844,7 @@ uint64_t helper_sqrtg (uint64_t a)
>  /* S floating (single) */
>  
>  /* Taken from linux/arch/alpha/kernel/traps.c, s_mem_to_reg.  */
> -static inline uint64_t float32_to_s_int(uint32_t fi)
> +static uint64_t float32_to_s_int(uint32_t fi)
>  {
>      uint32_t frac = fi & 0x7fffff;
>      uint32_t sign = fi >> 31;
> @@ -649,7 +866,7 @@ static inline uint64_t float32_to_s_int(uint32_t fi)
>              | ((uint64_t)frac << 29));
>  }
>  
> -static inline uint64_t float32_to_s(float32 fa)
> +static uint64_t float32_to_s(float32 fa)
>  {
>      CPU_FloatU a;
>      a.f = fa;
> @@ -678,52 +895,77 @@ uint64_t helper_memory_to_s (uint32_t a)
>      return float32_to_s_int(a);
>  }
>  
> -uint64_t helper_adds (uint64_t a, uint64_t b)
> +static float32 input_s(uint32_t quals, uint64_t a)
> +{
> +    return s_to_float32(remap_ieee_input(quals, a));
> +}
> +
> +uint64_t helper_adds (uint64_t a, uint64_t b, uint32_t quals)
>  {
>      float32 fa, fb, fr;
> +    uint32_t token;
>  
> -    fa = s_to_float32(a);
> -    fb = s_to_float32(b);
> +    token = begin_fp(quals);
> +    fa = input_s(quals, a);
> +    fb = input_s(quals, b);
>      fr = float32_add(fa, fb, &FP_STATUS);
> +    end_fp(quals, token);
> +
>      return float32_to_s(fr);
>  }
>  
> -uint64_t helper_subs (uint64_t a, uint64_t b)
> +uint64_t helper_subs (uint64_t a, uint64_t b, uint32_t quals)
>  {
>      float32 fa, fb, fr;
> +    uint32_t token;
>  
> -    fa = s_to_float32(a);
> -    fb = s_to_float32(b);
> +    token = begin_fp(quals);
> +    fa = input_s(quals, a);
> +    fb = input_s(quals, b);
>      fr = float32_sub(fa, fb, &FP_STATUS);
> +    end_fp(quals, token);
> +
>      return float32_to_s(fr);
>  }
>  
> -uint64_t helper_muls (uint64_t a, uint64_t b)
> +uint64_t helper_muls (uint64_t a, uint64_t b, uint32_t quals)
>  {
>      float32 fa, fb, fr;
> +    uint32_t token;
>  
> -    fa = s_to_float32(a);
> -    fb = s_to_float32(b);
> +    token = begin_fp(quals);
> +    fa = input_s(quals, a);
> +    fb = input_s(quals, b);
>      fr = float32_mul(fa, fb, &FP_STATUS);
> +    end_fp(quals, token);
> +
>      return float32_to_s(fr);
>  }
>  
> -uint64_t helper_divs (uint64_t a, uint64_t b)
> +uint64_t helper_divs (uint64_t a, uint64_t b, uint32_t quals)
>  {
>      float32 fa, fb, fr;
> +    uint32_t token;
>  
> -    fa = s_to_float32(a);
> -    fb = s_to_float32(b);
> +    token = begin_fp(quals);
> +    fa = input_s(quals, a);
> +    fb = input_s(quals, b);
>      fr = float32_div(fa, fb, &FP_STATUS);
> +    end_fp(quals, token);
> +
>      return float32_to_s(fr);
>  }
>  
> -uint64_t helper_sqrts (uint64_t a)
> +uint64_t helper_sqrts (uint64_t a, uint32_t quals)
>  {
>      float32 fa, fr;
> +    uint32_t token;
>  
> -    fa = s_to_float32(a);
> +    token = begin_fp(quals);
> +    fa = input_s(quals, a);
>      fr = float32_sqrt(fa, &FP_STATUS);
> +    end_fp(quals, token);
> +
>      return float32_to_s(fr);
>  }
>  
> @@ -745,52 +987,78 @@ static inline uint64_t float64_to_t(float64 fa)
>      return r.ll;
>  }
>  
> -uint64_t helper_addt (uint64_t a, uint64_t b)
> +/* Raise any exceptions needed for using F, given the insn qualifiers.  */
> +static float64 input_t(uint32_t quals, uint64_t a)
> +{
> +    return t_to_float64(remap_ieee_input(quals, a));
> +}
> +
> +uint64_t helper_addt (uint64_t a, uint64_t b, uint32_t quals)
>  {
>      float64 fa, fb, fr;
> +    uint32_t token;
>  
> -    fa = t_to_float64(a);
> -    fb = t_to_float64(b);
> +    token = begin_fp(quals);
> +    fa = input_t(quals, a);
> +    fb = input_t(quals, b);
>      fr = float64_add(fa, fb, &FP_STATUS);
> +    end_fp(quals, token);
> +
>      return float64_to_t(fr);
>  }
>  
> -uint64_t helper_subt (uint64_t a, uint64_t b)
> +uint64_t helper_subt (uint64_t a, uint64_t b, uint32_t quals)
>  {
>      float64 fa, fb, fr;
> +    uint32_t token;
>  
> -    fa = t_to_float64(a);
> -    fb = t_to_float64(b);
> +    token = begin_fp(quals);
> +    fa = input_t(quals, a);
> +    fb = input_t(quals, b);
>      fr = float64_sub(fa, fb, &FP_STATUS);
> +    end_fp(quals, token);
> +    
>      return float64_to_t(fr);
>  }
>  
> -uint64_t helper_mult (uint64_t a, uint64_t b)
> +uint64_t helper_mult (uint64_t a, uint64_t b, uint32_t quals)
>  {
>      float64 fa, fb, fr;
> +    uint32_t token;
>  
> -    fa = t_to_float64(a);
> -    fb = t_to_float64(b);
> +    token = begin_fp(quals);
> +    fa = input_t(quals, a);
> +    fb = input_t(quals, b);
>      fr = float64_mul(fa, fb, &FP_STATUS);
> +    end_fp(quals, token);
> +
>      return float64_to_t(fr);
>  }
>  
> -uint64_t helper_divt (uint64_t a, uint64_t b)
> +uint64_t helper_divt (uint64_t a, uint64_t b, uint32_t quals)
>  {
>      float64 fa, fb, fr;
> +    uint32_t token;
>  
> -    fa = t_to_float64(a);
> -    fb = t_to_float64(b);
> +    token = begin_fp(quals);
> +    fa = input_t(quals, a);
> +    fb = input_t(quals, b);
>      fr = float64_div(fa, fb, &FP_STATUS);
> +    end_fp(quals, token);
> +
>      return float64_to_t(fr);
>  }
>  
> -uint64_t helper_sqrtt (uint64_t a)
> +uint64_t helper_sqrtt (uint64_t a, uint32_t quals)
>  {
>      float64 fa, fr;
> +    uint32_t token;
>  
> -    fa = t_to_float64(a);
> +    token = begin_fp(quals);
> +    fa = input_t(quals, a);
>      fr = float64_sqrt(fa, &FP_STATUS);
> +    end_fp(quals, token);
> +
>      return float64_to_t(fr);
>  }
>  
> @@ -813,6 +1081,8 @@ uint64_t helper_cpyse(uint64_t a, uint64_t b)
>  
>  
>  /* Comparisons */
> +/* ??? Software completion qualifier missing.  */
> +
>  uint64_t helper_cmptun (uint64_t a, uint64_t b)
>  {
>      float64 fa, fb;
> @@ -905,70 +1175,218 @@ uint64_t helper_cmpglt(uint64_t a, uint64_t b)
>  }
>  
>  /* Floating point format conversion */
> -uint64_t helper_cvtts (uint64_t a)
> +uint64_t helper_cvtts (uint64_t a, uint32_t quals)
>  {
>      float64 fa;
>      float32 fr;
> +    uint32_t token;
>  
> -    fa = t_to_float64(a);
> +    token = begin_fp(quals);
> +    fa = input_t(quals, a);
>      fr = float64_to_float32(fa, &FP_STATUS);
> +    end_fp(quals, token);
> +
>      return float32_to_s(fr);
>  }
>  
> -uint64_t helper_cvtst (uint64_t a)
> +uint64_t helper_cvtst (uint64_t a, uint32_t quals)
>  {
>      float32 fa;
>      float64 fr;
> +    uint32_t token;
>  
> -    fa = s_to_float32(a);
> +    token = begin_fp(quals);
> +    fa = input_s(quals, a);
>      fr = float32_to_float64(fa, &FP_STATUS);
> +    end_fp(quals, token);
> +
>      return float64_to_t(fr);
>  }
>  
> -uint64_t helper_cvtqs (uint64_t a)
> +uint64_t helper_cvtqs (uint64_t a, uint32_t quals)
>  {
> -    float32 fr = int64_to_float32(a, &FP_STATUS);
> +    float32 fr;
> +    uint32_t token;
> +
> +    token = begin_fp(quals);
> +    fr = int64_to_float32(a, &FP_STATUS);
> +    end_fp(quals, token);
> +
>      return float32_to_s(fr);
>  }
>  
> -uint64_t helper_cvttq (uint64_t a)
> +/* Implement float64 to uint64 conversion without overflow enabled.
> +   In this mode we must supply the truncated result.  This behaviour
> +   is used by the compiler to get unsigned conversion for free with
> +   the same instruction.  */
> +
> +static uint64_t cvttq_internal(uint64_t a)
>  {
> -    float64 fa = t_to_float64(a);
> -    return float64_to_int64_round_to_zero(fa, &FP_STATUS);
> +    uint64_t frac, ret = 0;
> +    uint32_t exp, sign, exc = 0;
> +    int shift;
> +
> +    sign = (a >> 63);
> +    exp = (uint32_t)(a >> 52) & 0x7ff;
> +    frac = a & 0xfffffffffffffull;
> +
> +    if (exp == 0) {
> +        if (unlikely(frac != 0))
> +            goto do_underflow;
> +    } else if (exp == 0x7ff) {
> +        if (frac == 0)
> +            exc = float_flag_overflow;
> +        else
> +            exc = float_flag_invalid;
> +    } else {
> +        /* Restore implicit bit.  */
> +        frac |= 0x10000000000000ull;
> +
> +        /* Note that neither overflow exceptions nor inexact exceptions
> +           are desired.  This lets us streamline the checks quite a bit.  */
> +        shift = exp - 1023 - 52;
> +        if (shift >= 0) {
> +            /* In this case the number is so large that we must shift
> +               the fraction left.  There is no rounding to do.  */
> +            if (shift < 63) {
> +                ret = frac << shift;
> +                if ((ret >> shift) != frac)
> +                    exc = float_flag_overflow;
> +            }
> +        } else {
> +            uint64_t round;
> +
> +            /* In this case the number is smaller than the fraction as
> +               represented by the 52 bit number.  Here we must think 
> +               about rounding the result.  Handle this by shifting the
> +               fractional part of the number into the high bits of ROUND.
> +               This will let us efficiently handle round-to-nearest.  */
> +            shift = -shift;
> +            if (shift < 63) {
> +                ret = frac >> shift;
> +                round = frac << (64 - shift);
> +            } else {
> +                /* The exponent is so small we shift out everything.
> +                   Leave a sticky bit for proper rounding below.  */
> +            do_underflow:
> +                round = 1;
> +            }
> +
> +            if (round) {
> +                exc = float_flag_inexact;
> +                switch (FP_STATUS.float_rounding_mode) {
> +                case float_round_nearest_even:
> +                    if (round == (1ull << 63)) {
> +                        /* Fraction is exactly 0.5; round to even.  */
> +                        ret += (ret & 1);
> +                    } else if (round > (1ull << 63)) {
> +                        ret += 1;
> +                    }
> +                    break;
> +                case float_round_to_zero:
> +                    break;
> +                case float_round_up:
> +                    if (!sign)
> +                        ret += 1;
> +                    break;
> +                case float_round_down:
> +                    if (sign)
> +                        ret += 1;
> +                    break;
> +                }
> +            }
> +        }
> +        if (sign)
> +            ret = -ret;
> +    }
> +    if (unlikely(exc))
> +        float_raise(exc, &FP_STATUS);
> +
> +    return ret;
> +}
> +
> +uint64_t helper_cvttq (uint64_t a, uint32_t quals)
> +{
> +    uint64_t ret;
> +    uint32_t token;
> +
> +    /* ??? There's an arugument to be made that when /S is enabled, we
> +       should provide the standard IEEE saturated result, instead of
> +       the truncated result that we *must* provide when /V is disabled.
> +       However, that's not how either the Tru64 or Linux completion
> +       handlers actually work, and GCC knows it.  */
> +
> +    token = begin_fp(quals);
> +    a = remap_ieee_input(quals, a);
> +    ret = cvttq_internal(a);
> +    end_fp(quals, token);
> +
> +    return ret;
>  }
>  
> -uint64_t helper_cvtqt (uint64_t a)
> +uint64_t helper_cvtqt (uint64_t a, uint32_t quals)
>  {
> -    float64 fr = int64_to_float64(a, &FP_STATUS);
> +    float64 fr;
> +    uint32_t token;
> +
> +    token = begin_fp(quals);
> +    fr = int64_to_float64(a, &FP_STATUS);
> +    end_fp(quals, token);
> +
>      return float64_to_t(fr);
>  }
>  
> -uint64_t helper_cvtqf (uint64_t a)
> +uint64_t helper_cvtqf (uint64_t a, uint32_t quals)
>  {
> -    float32 fr = int64_to_float32(a, &FP_STATUS);
> +    float32 fr;
> +    uint32_t token;
> +
> +    token = begin_fp(quals);
> +    fr = int64_to_float32(a, &FP_STATUS);
> +    end_fp(quals, token);
> +
>      return float32_to_f(fr);
>  }
>  
> -uint64_t helper_cvtgf (uint64_t a)
> +uint64_t helper_cvtgf (uint64_t a, uint32_t quals)
>  {
>      float64 fa;
>      float32 fr;
> +    uint32_t token;
>  
>      fa = g_to_float64(a);
> +
> +    token = begin_fp(quals);
>      fr = float64_to_float32(fa, &FP_STATUS);
> +    end_fp(quals, token);
> +
>      return float32_to_f(fr);
>  }
>  
> -uint64_t helper_cvtgq (uint64_t a)
> +uint64_t helper_cvtgq (uint64_t a, uint32_t quals)
>  {
> -    float64 fa = g_to_float64(a);
> -    return float64_to_int64_round_to_zero(fa, &FP_STATUS);
> +    float64 fa;
> +    uint64_t ret;
> +    uint32_t token;
> +
> +    fa = g_to_float64(a);
> +
> +    token = begin_fp(quals);
> +    ret = float64_to_int64(fa, &FP_STATUS);
> +    end_fp(quals, token);
> +
> +    return ret;
>  }
>  
> -uint64_t helper_cvtqg (uint64_t a)
> +uint64_t helper_cvtqg (uint64_t a, uint32_t quals)
>  {
>      float64 fr;
> +    uint32_t token;
> +
> +    token = begin_fp(quals);
>      fr = int64_to_float64(a, &FP_STATUS);
> +    end_fp(quals, token);
> +
>      return float64_to_g(fr);
>  }
>  
> @@ -979,35 +1397,24 @@ uint64_t helper_cvtlq (uint64_t a)
>      return (lo & 0x3FFFFFFF) | (hi & 0xc0000000);
>  }
>  
> -static inline uint64_t __helper_cvtql(uint64_t a, int s, int v)
> -{
> -    uint64_t r;
> -
> -    r = ((uint64_t)(a & 0xC0000000)) << 32;
> -    r |= ((uint64_t)(a & 0x7FFFFFFF)) << 29;
> -
> -    if (v && (int64_t)((int32_t)r) != (int64_t)r) {
> -        helper_excp(EXCP_ARITH, EXCP_ARITH_OVERFLOW);
> -    }
> -    if (s) {
> -        /* TODO */
> -    }
> -    return r;
> -}
> -
>  uint64_t helper_cvtql (uint64_t a)
>  {
> -    return __helper_cvtql(a, 0, 0);
> +    return ((a & 0xC0000000) << 32) | ((a & 0x7FFFFFFF) << 29);
>  }
>  
>  uint64_t helper_cvtqlv (uint64_t a)
>  {
> -    return __helper_cvtql(a, 0, 1);
> +    if ((int32_t)a != (int64_t)a)
> +        helper_excp(EXCP_ARITH, EXC_M_IOV);
> +    return helper_cvtql(a);
>  }
>  
>  uint64_t helper_cvtqlsv (uint64_t a)
>  {
> -    return __helper_cvtql(a, 1, 1);
> +    /* ??? I'm pretty sure there's nothing that /sv needs to do that /v
> +       doesn't do.  The only thing I can think is that /sv is a valid
> +       instruction merely for completeness in the ISA.  */
> +    return helper_cvtqlv(a);
>  }
>  
>  /* PALcode support special instructions */
> diff --git a/target-alpha/translate.c b/target-alpha/translate.c
> index 45cb697..e0ca0ed 100644
> --- a/target-alpha/translate.c
> +++ b/target-alpha/translate.c
> @@ -442,81 +442,79 @@ static void gen_fcmov(TCGCond inv_cond, int ra, int rb, int rc)
>      gen_set_label(l1);
>  }
>  
> -#define FARITH2(name)                                       \
> -static inline void glue(gen_f, name)(int rb, int rc)        \
> -{                                                           \
> -    if (unlikely(rc == 31))                                 \
> -      return;                                               \
> -                                                            \
> -    if (rb != 31)                                           \
> -        gen_helper_ ## name (cpu_fir[rc], cpu_fir[rb]);    \
> -    else {                                                  \
> -        TCGv tmp = tcg_const_i64(0);                        \
> -        gen_helper_ ## name (cpu_fir[rc], tmp);            \
> -        tcg_temp_free(tmp);                                 \
> -    }                                                       \
> +#define FARITH2(name)                                   \
> +static inline void glue(gen_f, name)(int rb, int rc)    \
> +{                                                       \
> +    if (unlikely(rc == 31))                             \
> +      return;                                           \
> +                                                        \
> +    if (rb != 31)                                       \
> +        gen_helper_ ## name (cpu_fir[rc], cpu_fir[rb]); \
> +    else {                                              \
> +        TCGv tmp = tcg_const_i64(0);                    \
> +        gen_helper_ ## name (cpu_fir[rc], tmp);         \
> +        tcg_temp_free(tmp);                             \
> +    }                                                   \
>  }
> -FARITH2(sqrts)
> -FARITH2(sqrtf)
> -FARITH2(sqrtg)
> -FARITH2(sqrtt)
> -FARITH2(cvtgf)
> -FARITH2(cvtgq)
> -FARITH2(cvtqf)
> -FARITH2(cvtqg)
> -FARITH2(cvtst)
> -FARITH2(cvtts)
> -FARITH2(cvttq)
> -FARITH2(cvtqs)
> -FARITH2(cvtqt)
>  FARITH2(cvtlq)
>  FARITH2(cvtql)
>  FARITH2(cvtqlv)
>  FARITH2(cvtqlsv)
>  
> -#define FARITH3(name)                                                     \
> -static inline void glue(gen_f, name)(int ra, int rb, int rc)              \
> -{                                                                         \
> -    if (unlikely(rc == 31))                                               \
> -        return;                                                           \
> -                                                                          \
> -    if (ra != 31) {                                                       \
> -        if (rb != 31)                                                     \
> -            gen_helper_ ## name (cpu_fir[rc], cpu_fir[ra], cpu_fir[rb]);  \
> -        else {                                                            \
> -            TCGv tmp = tcg_const_i64(0);                                  \
> -            gen_helper_ ## name (cpu_fir[rc], cpu_fir[ra], tmp);          \
> -            tcg_temp_free(tmp);                                           \
> -        }                                                                 \
> -    } else {                                                              \
> -        TCGv tmp = tcg_const_i64(0);                                      \
> -        if (rb != 31)                                                     \
> -            gen_helper_ ## name (cpu_fir[rc], tmp, cpu_fir[rb]);          \
> -        else                                                              \
> -            gen_helper_ ## name (cpu_fir[rc], tmp, tmp);                   \
> -        tcg_temp_free(tmp);                                               \
> -    }                                                                     \
> +#define QFARITH2(name)                                          \
> +static inline void glue(gen_f, name)(int rb, int rc, int opc)   \
> +{                                                               \
> +    TCGv_i32 quals;                                             \
> +    if (unlikely(rc == 31))                                     \
> +      return;                                                   \
> +    quals = tcg_const_i32(opc & ~0x3f);                         \
> +    if (rb != 31)                                               \
> +        gen_helper_ ## name (cpu_fir[rc], cpu_fir[rb], quals);  \
> +    else {                                                      \
> +        TCGv tmp = tcg_const_i64(0);                            \
> +        gen_helper_ ## name (cpu_fir[rc], tmp, quals);          \
> +        tcg_temp_free(tmp);                                     \
> +    }                                                           \
> +    tcg_temp_free_i32(quals);                                   \
> +}
> +QFARITH2(sqrts)
> +QFARITH2(sqrtf)
> +QFARITH2(sqrtg)
> +QFARITH2(sqrtt)
> +QFARITH2(cvtgf)
> +QFARITH2(cvtgq)
> +QFARITH2(cvtqf)
> +QFARITH2(cvtqg)
> +QFARITH2(cvtst)
> +QFARITH2(cvtts)
> +QFARITH2(cvttq)
> +QFARITH2(cvtqs)
> +QFARITH2(cvtqt)
> +
> +#define FARITH3(name)                                           \
> +static inline void glue(gen_f, name)(int ra, int rb, int rc)    \
> +{                                                               \
> +    TCGv zero, ta, tb;                                          \
> +    if (unlikely(rc == 31))                                     \
> +        return;                                                 \
> +    ta = cpu_fir[ra];                                           \
> +    tb = cpu_fir[rb];                                           \
> +    if (unlikely(ra == 31)) {                                   \
> +        zero = tcg_const_i64(0);                                \
> +        ta = zero;                                              \
> +    }                                                           \
> +    if (unlikely(rb == 31)) {                                   \
> +        if (ra != 31)                                           \
> +            zero = tcg_const_i64(0);                            \
> +        tb = zero;                                              \
> +    }                                                           \
> +    gen_helper_ ## name (cpu_fir[rc], ta, tb);                  \
> +    if (ra == 31 || rb == 31)                                   \
> +        tcg_temp_free(zero);                                    \
>  }
> -
> -FARITH3(addf)
> -FARITH3(subf)
> -FARITH3(mulf)
> -FARITH3(divf)
> -FARITH3(addg)
> -FARITH3(subg)
> -FARITH3(mulg)
> -FARITH3(divg)
>  FARITH3(cmpgeq)
>  FARITH3(cmpglt)
>  FARITH3(cmpgle)
> -FARITH3(adds)
> -FARITH3(subs)
> -FARITH3(muls)
> -FARITH3(divs)
> -FARITH3(addt)
> -FARITH3(subt)
> -FARITH3(mult)
> -FARITH3(divt)
>  FARITH3(cmptun)
>  FARITH3(cmpteq)
>  FARITH3(cmptlt)
> @@ -525,6 +523,47 @@ FARITH3(cpys)
>  FARITH3(cpysn)
>  FARITH3(cpyse)
>  
> +#define QFARITH3(name)                                                  \
> +static inline void glue(gen_f, name)(int ra, int rb, int rc, int opc)   \
> +{                                                                       \
> +    TCGv zero, ta, tb;                                                  \
> +    TCGv_i32 quals;                                                     \
> +    if (unlikely(rc == 31))                                             \
> +        return;                                                         \
> +    ta = cpu_fir[ra];                                                   \
> +    tb = cpu_fir[rb];                                                   \
> +    if (unlikely(ra == 31)) {                                           \
> +        zero = tcg_const_i64(0);                                        \
> +        ta = zero;                                                      \
> +    }                                                                   \
> +    if (unlikely(rb == 31)) {                                           \
> +        if (ra != 31)                                                   \
> +            zero = tcg_const_i64(0);                                    \
> +        tb = zero;                                                      \
> +    }                                                                   \
> +    quals = tcg_const_i32(opc & ~0x3f);                                 \
> +    gen_helper_ ## name (cpu_fir[rc], ta, tb, quals);                   \
> +    tcg_temp_free_i32(quals);                                           \
> +    if (ra == 31 || rb == 31)                                           \
> +        tcg_temp_free(zero);                                            \
> +}
> +QFARITH3(addf)
> +QFARITH3(subf)
> +QFARITH3(mulf)
> +QFARITH3(divf)
> +QFARITH3(addg)
> +QFARITH3(subg)
> +QFARITH3(mulg)
> +QFARITH3(divg)
> +QFARITH3(adds)
> +QFARITH3(subs)
> +QFARITH3(muls)
> +QFARITH3(divs)
> +QFARITH3(addt)
> +QFARITH3(subt)
> +QFARITH3(mult)
> +QFARITH3(divt)
> +
>  static inline uint64_t zapnot_mask(uint8_t lit)
>  {
>      uint64_t mask = 0;
> @@ -1607,7 +1646,7 @@ static inline int translate_one(DisasContext *ctx, uint32_t insn)
>          }
>          break;
>      case 0x14:
> -        switch (fpfn) { /* f11 & 0x3F */
> +        switch (fpfn) { /* fn11 & 0x3F */
>          case 0x04:
>              /* ITOFS */
>              if (!(ctx->amask & AMASK_FIX))
> @@ -1626,13 +1665,13 @@ static inline int translate_one(DisasContext *ctx, uint32_t insn)
>              /* SQRTF */
>              if (!(ctx->amask & AMASK_FIX))
>                  goto invalid_opc;
> -            gen_fsqrtf(rb, rc);
> +            gen_fsqrtf(rb, rc, fn11);
>              break;
>          case 0x0B:
>              /* SQRTS */
>              if (!(ctx->amask & AMASK_FIX))
>                  goto invalid_opc;
> -            gen_fsqrts(rb, rc);
> +            gen_fsqrts(rb, rc, fn11);
>              break;
>          case 0x14:
>              /* ITOFF */
> @@ -1663,13 +1702,13 @@ static inline int translate_one(DisasContext *ctx, uint32_t insn)
>              /* SQRTG */
>              if (!(ctx->amask & AMASK_FIX))
>                  goto invalid_opc;
> -            gen_fsqrtg(rb, rc);
> +            gen_fsqrtg(rb, rc, fn11);
>              break;
>          case 0x02B:
>              /* SQRTT */
>              if (!(ctx->amask & AMASK_FIX))
>                  goto invalid_opc;
> -            gen_fsqrtt(rb, rc);
> +            gen_fsqrtt(rb, rc, fn11);
>              break;
>          default:
>              goto invalid_opc;
> @@ -1677,47 +1716,42 @@ static inline int translate_one(DisasContext *ctx, uint32_t insn)
>          break;
>      case 0x15:
>          /* VAX floating point */
> -        /* XXX: rounding mode and trap are ignored (!) */
> -        switch (fpfn) { /* f11 & 0x3F */
> +        switch (fpfn) { /* fn11 & 0x3F */
>          case 0x00:
>              /* ADDF */
> -            gen_faddf(ra, rb, rc);
> +            gen_faddf(ra, rb, rc, fn11);
>              break;
>          case 0x01:
>              /* SUBF */
> -            gen_fsubf(ra, rb, rc);
> +            gen_fsubf(ra, rb, rc, fn11);
>              break;
>          case 0x02:
>              /* MULF */
> -            gen_fmulf(ra, rb, rc);
> +            gen_fmulf(ra, rb, rc, fn11);
>              break;
>          case 0x03:
>              /* DIVF */
> -            gen_fdivf(ra, rb, rc);
> +            gen_fdivf(ra, rb, rc, fn11);
>              break;
>          case 0x1E:
>              /* CVTDG */
> -#if 0 // TODO
> -            gen_fcvtdg(rb, rc);
> -#else
> +            /* TODO */
>              goto invalid_opc;
> -#endif
> -            break;
>          case 0x20:
>              /* ADDG */
> -            gen_faddg(ra, rb, rc);
> +            gen_faddg(ra, rb, rc, fn11);
>              break;
>          case 0x21:
>              /* SUBG */
> -            gen_fsubg(ra, rb, rc);
> +            gen_fsubg(ra, rb, rc, fn11);
>              break;
>          case 0x22:
>              /* MULG */
> -            gen_fmulg(ra, rb, rc);
> +            gen_fmulg(ra, rb, rc, fn11);
>              break;
>          case 0x23:
>              /* DIVG */
> -            gen_fdivg(ra, rb, rc);
> +            gen_fdivg(ra, rb, rc, fn11);
>              break;
>          case 0x25:
>              /* CMPGEQ */
> @@ -1733,27 +1767,23 @@ static inline int translate_one(DisasContext *ctx, uint32_t insn)
>              break;
>          case 0x2C:
>              /* CVTGF */
> -            gen_fcvtgf(rb, rc);
> +            gen_fcvtgf(rb, rc, fn11);
>              break;
>          case 0x2D:
>              /* CVTGD */
> -#if 0 // TODO
> -            gen_fcvtgd(rb, rc);
> -#else
> +            /* TODO */
>              goto invalid_opc;
> -#endif
> -            break;
>          case 0x2F:
>              /* CVTGQ */
> -            gen_fcvtgq(rb, rc);
> +            gen_fcvtgq(rb, rc, fn11);
>              break;
>          case 0x3C:
>              /* CVTQF */
> -            gen_fcvtqf(rb, rc);
> +            gen_fcvtqf(rb, rc, fn11);
>              break;
>          case 0x3E:
>              /* CVTQG */
> -            gen_fcvtqg(rb, rc);
> +            gen_fcvtqg(rb, rc, fn11);
>              break;
>          default:
>              goto invalid_opc;
> @@ -1761,39 +1791,38 @@ static inline int translate_one(DisasContext *ctx, uint32_t insn)
>          break;
>      case 0x16:
>          /* IEEE floating-point */
> -        /* XXX: rounding mode and traps are ignored (!) */
> -        switch (fpfn) { /* f11 & 0x3F */
> +        switch (fpfn) { /* fn11 & 0x3F */
>          case 0x00:
>              /* ADDS */
> -            gen_fadds(ra, rb, rc);
> +            gen_fadds(ra, rb, rc, fn11);
>              break;
>          case 0x01:
>              /* SUBS */
> -            gen_fsubs(ra, rb, rc);
> +            gen_fsubs(ra, rb, rc, fn11);
>              break;
>          case 0x02:
>              /* MULS */
> -            gen_fmuls(ra, rb, rc);
> +            gen_fmuls(ra, rb, rc, fn11);
>              break;
>          case 0x03:
>              /* DIVS */
> -            gen_fdivs(ra, rb, rc);
> +            gen_fdivs(ra, rb, rc, fn11);
>              break;
>          case 0x20:
>              /* ADDT */
> -            gen_faddt(ra, rb, rc);
> +            gen_faddt(ra, rb, rc, fn11);
>              break;
>          case 0x21:
>              /* SUBT */
> -            gen_fsubt(ra, rb, rc);
> +            gen_fsubt(ra, rb, rc, fn11);
>              break;
>          case 0x22:
>              /* MULT */
> -            gen_fmult(ra, rb, rc);
> +            gen_fmult(ra, rb, rc, fn11);
>              break;
>          case 0x23:
>              /* DIVT */
> -            gen_fdivt(ra, rb, rc);
> +            gen_fdivt(ra, rb, rc, fn11);
>              break;
>          case 0x24:
>              /* CMPTUN */
> @@ -1812,26 +1841,25 @@ static inline int translate_one(DisasContext *ctx, uint32_t insn)
>              gen_fcmptle(ra, rb, rc);
>              break;
>          case 0x2C:
> -            /* XXX: incorrect */
>              if (fn11 == 0x2AC || fn11 == 0x6AC) {
>                  /* CVTST */
> -                gen_fcvtst(rb, rc);
> +                gen_fcvtst(rb, rc, fn11);
>              } else {
>                  /* CVTTS */
> -                gen_fcvtts(rb, rc);
> +                gen_fcvtts(rb, rc, fn11);
>              }
>              break;
>          case 0x2F:
>              /* CVTTQ */
> -            gen_fcvttq(rb, rc);
> +            gen_fcvttq(rb, rc, fn11);
>              break;
>          case 0x3C:
>              /* CVTQS */
> -            gen_fcvtqs(rb, rc);
> +            gen_fcvtqs(rb, rc, fn11);
>              break;
>          case 0x3E:
>              /* CVTQT */
> -            gen_fcvtqt(rb, rc);
> +            gen_fcvtqt(rb, rc, fn11);
>              break;
>          default:
>              goto invalid_opc;
Richard Henderson Dec. 28, 2009, 7:06 p.m. UTC | #2
On 12/24/2009 07:18 AM, Aurelien Jarno wrote:
> My main concern about this patch is that I don't really understand why
> the current fp exceptions, the current rounding mode or flush_to_zero
> mode are stored in FP_STATUS. I think it would be better to have
> dedicated variable(s) in the cpu state structure, as it is done in other
> emulated architectures.

The internal format for these features is quite different (in terms of 
bit ordering) than the native Alpha format.  If I were to be forcing the 
bits back into the Alpha format after each operation, that would be far 
more shifting and masking than only doing the conversion to Alpha format 
when the FPCR register is read or written.  At present I'm only saving a 
value and ORing it back in

However, if you're convinced I should keep things in Alpha format all 
the time, I can rearrange the patch along those lines.


r~
Aurelien Jarno Dec. 28, 2009, 7:48 p.m. UTC | #3
On Mon, Dec 28, 2009 at 11:06:20AM -0800, Richard Henderson wrote:
> On 12/24/2009 07:18 AM, Aurelien Jarno wrote:
>> My main concern about this patch is that I don't really understand why
>> the current fp exceptions, the current rounding mode or flush_to_zero
>> mode are stored in FP_STATUS. I think it would be better to have
>> dedicated variable(s) in the cpu state structure, as it is done in other
>> emulated architectures.
>
> The internal format for these features is quite different (in terms of  
> bit ordering) than the native Alpha format.  If I were to be forcing the  
> bits back into the Alpha format after each operation, that would be far  
> more shifting and masking than only doing the conversion to Alpha format  
> when the FPCR register is read or written.  At present I'm only saving a  
> value and ORing it back in
>
> However, if you're convinced I should keep things in Alpha format all  
> the time, I can rearrange the patch along those lines.
>

If the format is too different, you should still use a dedicated
variable in the cpu state. Thinking more, there is no guarantee that
FP_STATUS is not overriden by another thread (or another CPU in system
mode), if it's value is set in one TB, and read back in another TB.

Also there is no need to save FP_STATUS and restore it. It should be set
to 0, and the result should be ORed with the dedicated variable.
Aurelien Jarno Dec. 28, 2009, 7:52 p.m. UTC | #4
On Mon, Dec 28, 2009 at 08:48:22PM +0100, Aurelien Jarno wrote:
> On Mon, Dec 28, 2009 at 11:06:20AM -0800, Richard Henderson wrote:
> > On 12/24/2009 07:18 AM, Aurelien Jarno wrote:
> >> My main concern about this patch is that I don't really understand why
> >> the current fp exceptions, the current rounding mode or flush_to_zero
> >> mode are stored in FP_STATUS. I think it would be better to have
> >> dedicated variable(s) in the cpu state structure, as it is done in other
> >> emulated architectures.
> >
> > The internal format for these features is quite different (in terms of  
> > bit ordering) than the native Alpha format.  If I were to be forcing the  
> > bits back into the Alpha format after each operation, that would be far  
> > more shifting and masking than only doing the conversion to Alpha format  
> > when the FPCR register is read or written.  At present I'm only saving a  
> > value and ORing it back in
> >
> > However, if you're convinced I should keep things in Alpha format all  
> > the time, I can rearrange the patch along those lines.
> >
> 
> If the format is too different, you should still use a dedicated
> variable in the cpu state. Thinking more, there is no guarantee that
> FP_STATUS is not overriden by another thread (or another CPU in system
> mode), if it's value is set in one TB, and read back in another TB.

Forget that part, there is actually one FP_STATUS per CPU, so it should
work. My grep didn't work due to the #define...

> Also there is no need to save FP_STATUS and restore it. It should be set
> to 0, and the result should be ORed with the dedicated variable.
> 
> -- 
> Aurelien Jarno	                        GPG: 1024D/F1BCDB73
> aurelien@aurel32.net                 http://www.aurel32.net
Aurelien Jarno Dec. 28, 2009, 8:10 p.m. UTC | #5
On Mon, Dec 28, 2009 at 08:52:28PM +0100, Aurelien Jarno wrote:
> On Mon, Dec 28, 2009 at 08:48:22PM +0100, Aurelien Jarno wrote:
> > On Mon, Dec 28, 2009 at 11:06:20AM -0800, Richard Henderson wrote:
> > > On 12/24/2009 07:18 AM, Aurelien Jarno wrote:
> > >> My main concern about this patch is that I don't really understand why
> > >> the current fp exceptions, the current rounding mode or flush_to_zero
> > >> mode are stored in FP_STATUS. I think it would be better to have
> > >> dedicated variable(s) in the cpu state structure, as it is done in other
> > >> emulated architectures.
> > >
> > > The internal format for these features is quite different (in terms of  
> > > bit ordering) than the native Alpha format.  If I were to be forcing the  
> > > bits back into the Alpha format after each operation, that would be far  
> > > more shifting and masking than only doing the conversion to Alpha format  
> > > when the FPCR register is read or written.  At present I'm only saving a  
> > > value and ORing it back in
> > >
> > > However, if you're convinced I should keep things in Alpha format all  
> > > the time, I can rearrange the patch along those lines.
> > >
> > 
> > If the format is too different, you should still use a dedicated
> > variable in the cpu state. Thinking more, there is no guarantee that
> > FP_STATUS is not overriden by another thread (or another CPU in system
> > mode), if it's value is set in one TB, and read back in another TB.
> 
> Forget that part, there is actually one FP_STATUS per CPU, so it should
> work. My grep didn't work due to the #define...
> 
> > Also there is no need to save FP_STATUS and restore it. It should be set
> > to 0, and the result should be ORed with the dedicated variable.
> > 

To make it more clear, what I call save FP_STATUS is all the begin_*
functions. They should be one or more variable added into the CPU state
to hold those values.

For example begin_fp_exception can then be replaced by a simple:
  set_float_exception_flags(0, &FP_STATUS);

And then end_fp_exception the mask and shift operation can be replaced
by:
  env->fp_exceptions |= FP_STATUS.float_exception_flags;

There is no need to call set_float_exception_flags(). When reading the
FPCR register, the value should be computed from env->fp_exceptions 
instead.

Similarly there is no need to save or switch back to the default 
roundmode or flush_to_zero after and FP instruction, as long as all FP
instructions set them before the actual FP code. It seems to be the case
with your patch anyway.
Richard Henderson Jan. 4, 2010, 10:46 p.m. UTC | #6
I've split up the FPCR as requested by Aurelien.  We no longer 
set anything in FP_STATUS after the execution of the operation,
only copy data from FP_STATUS to some env->fpcr field.

I have totally rewritten the patch to be more along the line 
that Laurent was suggesting, in that the rounding mode and other
qualifiers are totally parsed within the translator.  I no longer
pass the FN11 field to the helper functions.

Unlike Laurent's prototype, I do not set the rounding mode at 
every FP instruction; I remember the previous setting of the
rounding mode within a TB.  Similarly for the flush-to-zero field.

I do not handle VAX instructions at all.  The existing VAX support
is mostly broken, and I didn't feel like compounding the problem.


r~


--
Richard Henderson (6):
  target-alpha: Fix gdb access to fpcr and unique.
  target-alpha: Split up FPCR value into separate fields.
  target-alpha: Reduce internal processor registers for user-mode.
  target-alpha: Clean up arithmetic traps.
  target-alpha: Mark helper_excp as NORETURN.
  target-alpha: Implement IEEE FP qualifiers.
Richard Henderson Jan. 26, 2010, 4:35 p.m. UTC | #7
Ping?

r~

On 01/04/2010 02:46 PM, Richard Henderson wrote:
> I've split up the FPCR as requested by Aurelien.  We no longer
> set anything in FP_STATUS after the execution of the operation,
> only copy data from FP_STATUS to some env->fpcr field.
>
> I have totally rewritten the patch to be more along the line
> that Laurent was suggesting, in that the rounding mode and other
> qualifiers are totally parsed within the translator.  I no longer
> pass the FN11 field to the helper functions.
>
> Unlike Laurent's prototype, I do not set the rounding mode at
> every FP instruction; I remember the previous setting of the
> rounding mode within a TB.  Similarly for the flush-to-zero field.
>
> I do not handle VAX instructions at all.  The existing VAX support
> is mostly broken, and I didn't feel like compounding the problem.
>
>
> r~
>
>
> --
> Richard Henderson (6):
>    target-alpha: Fix gdb access to fpcr and unique.
>    target-alpha: Split up FPCR value into separate fields.
>    target-alpha: Reduce internal processor registers for user-mode.
>    target-alpha: Clean up arithmetic traps.
>    target-alpha: Mark helper_excp as NORETURN.
>    target-alpha: Implement IEEE FP qualifiers.
>
>
>
Richard Henderson Feb. 9, 2010, 6:47 p.m. UTC | #8
Ping 2.


r~


On 01/04/2010 02:46 PM, Richard Henderson wrote:
> I've split up the FPCR as requested by Aurelien.  We no longer
> set anything in FP_STATUS after the execution of the operation,
> only copy data from FP_STATUS to some env->fpcr field.
>
> I have totally rewritten the patch to be more along the line
> that Laurent was suggesting, in that the rounding mode and other
> qualifiers are totally parsed within the translator.  I no longer
> pass the FN11 field to the helper functions.
>
> Unlike Laurent's prototype, I do not set the rounding mode at
> every FP instruction; I remember the previous setting of the
> rounding mode within a TB.  Similarly for the flush-to-zero field.
>
> I do not handle VAX instructions at all.  The existing VAX support
> is mostly broken, and I didn't feel like compounding the problem.
>
>
> r~
>
>
> --
> Richard Henderson (6):
>    target-alpha: Fix gdb access to fpcr and unique.
>    target-alpha: Split up FPCR value into separate fields.
>    target-alpha: Reduce internal processor registers for user-mode.
>    target-alpha: Clean up arithmetic traps.
>    target-alpha: Mark helper_excp as NORETURN.
>    target-alpha: Implement IEEE FP qualifiers.
>
>
>
Aurelien Jarno Feb. 23, 2010, 10:58 p.m. UTC | #9
On Mon, Jan 04, 2010 at 02:46:05PM -0800, Richard Henderson wrote:
> I've split up the FPCR as requested by Aurelien.  We no longer 
> set anything in FP_STATUS after the execution of the operation,
> only copy data from FP_STATUS to some env->fpcr field.
> 
> I have totally rewritten the patch to be more along the line 
> that Laurent was suggesting, in that the rounding mode and other
> qualifiers are totally parsed within the translator.  I no longer
> pass the FN11 field to the helper functions.
> 

What's the benefit of doing that? I don't say it's wrong, I just want
to understand. Otherwise the patch looks good, so it can probably be
applied without any change.

In the meanwhile, I have applied patches 1 to 5.
Richard Henderson Feb. 24, 2010, 11:24 a.m. UTC | #10
On 02/23/2010 02:58 PM, Aurelien Jarno wrote:
>> I have totally rewritten the patch to be more along the line
>> that Laurent was suggesting, in that the rounding mode and other
>> qualifiers are totally parsed within the translator.  I no longer
>> pass the FN11 field to the helper functions.
>
> What's the benefit of doing that? I don't say it's wrong, I just want
> to understand. Otherwise the patch looks good, so it can probably be
> applied without any change.

I seem to recall Laurent opining that doing the interpretation
of the opcode in two different places was less than clean, and
in the end I agree with him.

FWIW, this configuration would also be compatible with a
future TCG enhancement to generate fp code, whereas the first
config would not.


r~
Aurelien Jarno Feb. 28, 2010, 4:49 p.m. UTC | #11
On Wed, Feb 24, 2010 at 12:24:55PM +0100, Richard Henderson wrote:
> On 02/23/2010 02:58 PM, Aurelien Jarno wrote:
> >>I have totally rewritten the patch to be more along the line
> >>that Laurent was suggesting, in that the rounding mode and other
> >>qualifiers are totally parsed within the translator.  I no longer
> >>pass the FN11 field to the helper functions.
> >
> >What's the benefit of doing that? I don't say it's wrong, I just want
> >to understand. Otherwise the patch looks good, so it can probably be
> >applied without any change.
> 
> I seem to recall Laurent opining that doing the interpretation
> of the opcode in two different places was less than clean, and
> in the end I agree with him.
> 
> FWIW, this configuration would also be compatible with a
> future TCG enhancement to generate fp code, whereas the first
> config would not.

I have applied the patch, but in order to avoid doing the same for all
targets, it might be a good idea to directly provide TCG functions to
modify FP_STATUS instead of using the interface from softfloat.h. This
would also have the advantage of clearly defining this interface, and 
make sure that the alpha target is not broken by a change in softfloat.h.
diff mbox

Patch

diff --git a/target-alpha/cpu.h b/target-alpha/cpu.h
index c0dff4b..c1c0470 100644
--- a/target-alpha/cpu.h
+++ b/target-alpha/cpu.h
@@ -430,9 +430,13 @@  enum {
 };
 
 /* Arithmetic exception */
-enum {
-    EXCP_ARITH_OVERFLOW,
-};
+#define EXC_M_IOV	(1<<16)		/* Integer Overflow */
+#define EXC_M_INE	(1<<15)		/* Inexact result */
+#define EXC_M_UNF	(1<<14)		/* Underflow */
+#define EXC_M_FOV	(1<<13)		/* Overflow */
+#define EXC_M_DZE	(1<<12)		/* Division by zero */
+#define EXC_M_INV	(1<<11)		/* Invalid operation */
+#define EXC_M_SWC	(1<<10)		/* Software completion */
 
 enum {
     IR_V0   = 0,
diff --git a/target-alpha/helper.c b/target-alpha/helper.c
index be7d37b..94821bd 100644
--- a/target-alpha/helper.c
+++ b/target-alpha/helper.c
@@ -27,41 +27,13 @@ 
 
 uint64_t cpu_alpha_load_fpcr (CPUState *env)
 {
-    uint64_t ret = 0;
-    int flags, mask;
-
-    flags = env->fp_status.float_exception_flags;
-    ret |= (uint64_t) flags << 52;
-    if (flags)
-        ret |= FPCR_SUM;
-    env->ipr[IPR_EXC_SUM] &= ~0x3E;
-    env->ipr[IPR_EXC_SUM] |= flags << 1;
-
-    mask = env->fp_status.float_exception_mask;
-    if (mask & float_flag_invalid)
-        ret |= FPCR_INVD;
-    if (mask & float_flag_divbyzero)
-        ret |= FPCR_DZED;
-    if (mask & float_flag_overflow)
-        ret |= FPCR_OVFD;
-    if (mask & float_flag_underflow)
-        ret |= FPCR_UNFD;
-    if (mask & float_flag_inexact)
-        ret |= FPCR_INED;
-
-    switch (env->fp_status.float_rounding_mode) {
-    case float_round_nearest_even:
-        ret |= 2ULL << FPCR_DYN_SHIFT;
-        break;
-    case float_round_down:
-        ret |= 1ULL << FPCR_DYN_SHIFT;
-        break;
-    case float_round_up:
-        ret |= 3ULL << FPCR_DYN_SHIFT;
-        break;
-    case float_round_to_zero:
-        break;
-    }
+    uint64_t ret = env->fp_status.float_exception_flags;
+
+    if (ret)
+      ret = FPCR_SUM | (ret << 52);
+
+    ret |= env->fpcr & ~(FPCR_SUM | FPCR_STATUS_MASK);
+
     return ret;
 }
 
@@ -69,6 +41,8 @@  void cpu_alpha_store_fpcr (CPUState *env, uint64_t val)
 {
     int round_mode, mask;
 
+    env->fpcr = val;
+
     set_float_exception_flags((val >> 52) & 0x3F, &env->fp_status);
 
     mask = 0;
@@ -86,6 +60,7 @@  void cpu_alpha_store_fpcr (CPUState *env, uint64_t val)
 
     switch ((val >> FPCR_DYN_SHIFT) & 3) {
     case 0:
+    default:
         round_mode = float_round_to_zero;
         break;
     case 1:
@@ -100,6 +75,11 @@  void cpu_alpha_store_fpcr (CPUState *env, uint64_t val)
         break;
     }
     set_float_rounding_mode(round_mode, &env->fp_status);
+
+    mask = 0;
+    if ((val & (FPCR_UNDZ|FPCR_UNFD)) == (FPCR_UNDZ|FPCR_UNFD))
+        mask = 1;
+    set_flush_to_zero(mask, &env->fp_status);
 }
 
 #if defined(CONFIG_USER_ONLY)
diff --git a/target-alpha/helper.h b/target-alpha/helper.h
index bedd3c0..1521a84 100644
--- a/target-alpha/helper.h
+++ b/target-alpha/helper.h
@@ -41,33 +41,33 @@  DEF_HELPER_1(store_fpcr, void, i64)
 
 DEF_HELPER_1(f_to_memory, i32, i64)
 DEF_HELPER_1(memory_to_f, i64, i32)
-DEF_HELPER_2(addf, i64, i64, i64)
-DEF_HELPER_2(subf, i64, i64, i64)
-DEF_HELPER_2(mulf, i64, i64, i64)
-DEF_HELPER_2(divf, i64, i64, i64)
-DEF_HELPER_1(sqrtf, i64, i64)
+DEF_HELPER_3(addf, i64, i64, i64, i32)
+DEF_HELPER_3(subf, i64, i64, i64, i32)
+DEF_HELPER_3(mulf, i64, i64, i64, i32)
+DEF_HELPER_3(divf, i64, i64, i64, i32)
+DEF_HELPER_2(sqrtf, i64, i64, i32)
 
 DEF_HELPER_1(g_to_memory, i64, i64)
 DEF_HELPER_1(memory_to_g, i64, i64)
-DEF_HELPER_2(addg, i64, i64, i64)
-DEF_HELPER_2(subg, i64, i64, i64)
-DEF_HELPER_2(mulg, i64, i64, i64)
-DEF_HELPER_2(divg, i64, i64, i64)
-DEF_HELPER_1(sqrtg, i64, i64)
+DEF_HELPER_3(addg, i64, i64, i64, i32)
+DEF_HELPER_3(subg, i64, i64, i64, i32)
+DEF_HELPER_3(mulg, i64, i64, i64, i32)
+DEF_HELPER_3(divg, i64, i64, i64, i32)
+DEF_HELPER_2(sqrtg, i64, i64, i32)
 
 DEF_HELPER_1(s_to_memory, i32, i64)
 DEF_HELPER_1(memory_to_s, i64, i32)
-DEF_HELPER_2(adds, i64, i64, i64)
-DEF_HELPER_2(subs, i64, i64, i64)
-DEF_HELPER_2(muls, i64, i64, i64)
-DEF_HELPER_2(divs, i64, i64, i64)
-DEF_HELPER_1(sqrts, i64, i64)
-
-DEF_HELPER_2(addt, i64, i64, i64)
-DEF_HELPER_2(subt, i64, i64, i64)
-DEF_HELPER_2(mult, i64, i64, i64)
-DEF_HELPER_2(divt, i64, i64, i64)
-DEF_HELPER_1(sqrtt, i64, i64)
+DEF_HELPER_3(adds, i64, i64, i64, i32)
+DEF_HELPER_3(subs, i64, i64, i64, i32)
+DEF_HELPER_3(muls, i64, i64, i64, i32)
+DEF_HELPER_3(divs, i64, i64, i64, i32)
+DEF_HELPER_2(sqrts, i64, i64, i32)
+
+DEF_HELPER_3(addt, i64, i64, i64, i32)
+DEF_HELPER_3(subt, i64, i64, i64, i32)
+DEF_HELPER_3(mult, i64, i64, i64, i32)
+DEF_HELPER_3(divt, i64, i64, i64, i32)
+DEF_HELPER_2(sqrtt, i64, i64, i32)
 
 DEF_HELPER_2(cmptun, i64, i64, i64)
 DEF_HELPER_2(cmpteq, i64, i64, i64)
@@ -81,15 +81,15 @@  DEF_HELPER_2(cpys, i64, i64, i64)
 DEF_HELPER_2(cpysn, i64, i64, i64)
 DEF_HELPER_2(cpyse, i64, i64, i64)
 
-DEF_HELPER_1(cvtts, i64, i64)
-DEF_HELPER_1(cvtst, i64, i64)
-DEF_HELPER_1(cvttq, i64, i64)
-DEF_HELPER_1(cvtqs, i64, i64)
-DEF_HELPER_1(cvtqt, i64, i64)
-DEF_HELPER_1(cvtqf, i64, i64)
-DEF_HELPER_1(cvtgf, i64, i64)
-DEF_HELPER_1(cvtgq, i64, i64)
-DEF_HELPER_1(cvtqg, i64, i64)
+DEF_HELPER_2(cvtts, i64, i64, i32)
+DEF_HELPER_2(cvtst, i64, i64, i32)
+DEF_HELPER_2(cvttq, i64, i64, i32)
+DEF_HELPER_2(cvtqs, i64, i64, i32)
+DEF_HELPER_2(cvtqt, i64, i64, i32)
+DEF_HELPER_2(cvtqf, i64, i64, i32)
+DEF_HELPER_2(cvtgf, i64, i64, i32)
+DEF_HELPER_2(cvtgq, i64, i64, i32)
+DEF_HELPER_2(cvtqg, i64, i64, i32)
 DEF_HELPER_1(cvtlq, i64, i64)
 DEF_HELPER_1(cvtql, i64, i64)
 DEF_HELPER_1(cvtqlv, i64, i64)
diff --git a/target-alpha/op_helper.c b/target-alpha/op_helper.c
index b2abf6c..2d1c3d5 100644
--- a/target-alpha/op_helper.c
+++ b/target-alpha/op_helper.c
@@ -24,7 +24,7 @@ 
 
 /*****************************************************************************/
 /* Exceptions processing helpers */
-void helper_excp (int excp, int error)
+void QEMU_NORETURN helper_excp (int excp, int error)
 {
     env->exception_index = excp;
     env->error_code = error;
@@ -78,7 +78,7 @@  uint64_t helper_addqv (uint64_t op1, uint64_t op2)
     uint64_t tmp = op1;
     op1 += op2;
     if (unlikely((tmp ^ op2 ^ (-1ULL)) & (tmp ^ op1) & (1ULL << 63))) {
-        helper_excp(EXCP_ARITH, EXCP_ARITH_OVERFLOW);
+        helper_excp(EXCP_ARITH, EXC_M_IOV);
     }
     return op1;
 }
@@ -88,7 +88,7 @@  uint64_t helper_addlv (uint64_t op1, uint64_t op2)
     uint64_t tmp = op1;
     op1 = (uint32_t)(op1 + op2);
     if (unlikely((tmp ^ op2 ^ (-1UL)) & (tmp ^ op1) & (1UL << 31))) {
-        helper_excp(EXCP_ARITH, EXCP_ARITH_OVERFLOW);
+        helper_excp(EXCP_ARITH, EXC_M_IOV);
     }
     return op1;
 }
@@ -98,7 +98,7 @@  uint64_t helper_subqv (uint64_t op1, uint64_t op2)
     uint64_t res;
     res = op1 - op2;
     if (unlikely((op1 ^ op2) & (res ^ op1) & (1ULL << 63))) {
-        helper_excp(EXCP_ARITH, EXCP_ARITH_OVERFLOW);
+        helper_excp(EXCP_ARITH, EXC_M_IOV);
     }
     return res;
 }
@@ -108,7 +108,7 @@  uint64_t helper_sublv (uint64_t op1, uint64_t op2)
     uint32_t res;
     res = op1 - op2;
     if (unlikely((op1 ^ op2) & (res ^ op1) & (1UL << 31))) {
-        helper_excp(EXCP_ARITH, EXCP_ARITH_OVERFLOW);
+        helper_excp(EXCP_ARITH, EXC_M_IOV);
     }
     return res;
 }
@@ -118,7 +118,7 @@  uint64_t helper_mullv (uint64_t op1, uint64_t op2)
     int64_t res = (int64_t)op1 * (int64_t)op2;
 
     if (unlikely((int32_t)res != res)) {
-        helper_excp(EXCP_ARITH, EXCP_ARITH_OVERFLOW);
+        helper_excp(EXCP_ARITH, EXC_M_IOV);
     }
     return (int64_t)((int32_t)res);
 }
@@ -130,7 +130,7 @@  uint64_t helper_mulqv (uint64_t op1, uint64_t op2)
     muls64(&tl, &th, op1, op2);
     /* If th != 0 && th != -1, then we had an overflow */
     if (unlikely((th + 1) > 1)) {
-        helper_excp(EXCP_ARITH, EXCP_ARITH_OVERFLOW);
+        helper_excp(EXCP_ARITH, EXC_M_IOV);
     }
     return tl;
 }
@@ -370,8 +370,175 @@  uint64_t helper_unpkbw (uint64_t op1)
 
 /* Floating point helpers */
 
+/* ??? Not implemented is setting EXC_MASK, containing a bitmask of
+   destination registers of instructions that have caused arithmetic
+   traps.  Not needed for userspace emulation, or for complete 
+   emulation of the entire fpu stack within qemu.  But we would need
+   it to invoke a guest kernel's entArith trap handler properly.
+   
+   It would be possible to encode the FP destination register in the
+   QUAL parameter for the FPU helpers below; additional changes would
+   be required for ADD/V et al above.  */
+
+#define QUAL_RM_N	0x080	/* Round mode nearest even */
+#define QUAL_RM_C	0x000	/* Round mode chopped */
+#define QUAL_RM_M	0x040	/* Round mode minus infinity */
+#define QUAL_RM_D	0x0c0	/* Round mode dynamic */
+#define QUAL_RM_MASK	0x0c0
+
+#define QUAL_U		0x100	/* Underflow enable (fp output) */
+#define QUAL_V		0x100	/* Overflow enable (int output) */
+#define QUAL_S		0x400	/* Software completion enable */
+#define QUAL_I		0x200	/* Inexact detection enable */
+
+/* If the floating-point qualifiers specified a rounding mode,
+   set that rounding mode and remember the original mode for
+   resetting at the end of the instruction.  */
+static inline uint32_t begin_fp_roundmode(uint32_t qual)
+{
+    uint32_t rm = FP_STATUS.float_rounding_mode, old_rm = rm;
+
+    switch (qual & QUAL_RM_MASK) {
+    default:
+    case QUAL_RM_N:
+        rm = float_round_nearest_even;
+        break;
+    case QUAL_RM_C:
+        rm = float_round_to_zero;
+        break;
+    case QUAL_RM_M:
+        rm = float_round_down;
+        break;
+    case QUAL_RM_D:
+        return old_rm;
+    }
+    if (old_rm != rm)
+        set_float_rounding_mode(rm, &FP_STATUS);
+    return old_rm;
+}
+
+/* Zero the exception flags so that we can determine if the current
+   instruction raises any exceptions.  Save the old acrued exception
+   status so that we can restore them at the end of the insn.  */
+static inline uint32_t begin_fp_exception(void)
+{
+    uint32_t old_exc = (uint32_t)FP_STATUS.float_exception_flags << 8;
+    set_float_exception_flags(0, &FP_STATUS);
+    return old_exc;
+}
+
+static inline uint32_t begin_fp_flush_to_zero(uint32_t quals)
+{
+    /* If underflow detection is disabled, silently flush to zero.
+       Note that flush-to-zero mode may already be enabled via the FPCR.  */
+    if ((quals & QUAL_U) == 0 && !FP_STATUS.flush_to_zero) {
+        set_flush_to_zero(1, &FP_STATUS);
+        return 0x10000;
+    }
+    return 0;
+}
+
+/* Begin processing an fp operation.  Return a token that should be passed
+   when completing the fp operation.  */
+static uint32_t begin_fp(uint32_t quals)
+{
+    uint32_t ret = 0;
+
+    ret |= begin_fp_roundmode(quals);
+    ret |= begin_fp_flush_to_zero(quals);
+    ret |= begin_fp_exception();
+
+    return ret;
+}
+
+/* End processing an fp operation.  */
+
+static inline void end_fp_roundmode(uint32_t orig)
+{
+    uint32_t rm = FP_STATUS.float_rounding_mode, old_rm = orig & 0xff;
+    if (unlikely(rm != old_rm))
+        set_float_rounding_mode(old_rm, &FP_STATUS);
+}
+
+static inline void end_fp_flush_to_zero(uint32_t orig)
+{
+    if (orig & 0x10000)
+        set_flush_to_zero(0, &FP_STATUS);
+}
+
+static void end_fp_exception(uint32_t quals, uint32_t orig)
+{
+    uint8_t exc = FP_STATUS.float_exception_flags;
+
+    /* If inexact detection is disabled, silently clear it.  */
+    if ((quals & QUAL_I) == 0)
+        exc &= ~float_flag_inexact;
+
+    orig = (orig >> 8) & 0xff;
+    set_float_exception_flags(exc | orig, &FP_STATUS);
+
+    /* Raise an exception as required.  */
+    if (unlikely(exc)) {
+        if (quals & QUAL_S)
+            exc &= ~FP_STATUS.float_exception_mask;
+        if (exc) {
+            uint32_t hw_exc = 0;
+
+            if (exc & float_flag_invalid)
+                hw_exc |= EXC_M_INV;
+            if (exc & float_flag_divbyzero)
+                hw_exc |= EXC_M_DZE;
+            if (exc & float_flag_overflow)
+                hw_exc |= EXC_M_FOV;
+            if (exc & float_flag_underflow)
+                hw_exc |= EXC_M_UNF;
+            if (exc & float_flag_inexact)
+                hw_exc |= EXC_M_INE;
+
+            helper_excp(EXCP_ARITH, hw_exc);
+        }
+    }
+}
+
+static void end_fp(uint32_t quals, uint32_t orig)
+{
+    end_fp_roundmode(orig);
+    end_fp_flush_to_zero(orig);
+    end_fp_exception(quals, orig);
+}
+
+static uint64_t remap_ieee_input(uint32_t quals, uint64_t a)
+{
+    uint64_t frac;
+    uint32_t exp;
+
+    exp = (uint32_t)(a >> 52) & 0x7ff;
+    frac = a & 0xfffffffffffffull;
+
+    if (exp == 0) {
+        if (frac != 0) {
+            /* If DNZ is set, flush denormals to zero on input.  */
+            if (env->fpcr & FPCR_DNZ)
+                a = a & (1ull << 63);
+            /* If software completion not enabled, trap.  */
+            else if ((quals & QUAL_S) == 0)
+                helper_excp(EXCP_ARITH, EXC_M_UNF);
+        }
+    } else if (exp == 0x7ff) {
+        /* Infinity or NaN.  If software completion is not enabled, trap.
+           If /s is enabled, we'll properly signal for SNaN on output.  */
+        /* ??? I'm not sure these exception bit flags are correct.  I do
+           know that the Linux kernel, at least, doesn't rely on them and
+           just emulates the insn to figure out what exception to use.  */
+        if ((quals & QUAL_S) == 0)
+            helper_excp(EXCP_ARITH, frac ? EXC_M_INV : EXC_M_FOV);
+    }
+
+    return a;
+}
+
 /* F floating (VAX) */
-static inline uint64_t float32_to_f(float32 fa)
+static uint64_t float32_to_f(float32 fa)
 {
     uint64_t r, exp, mant, sig;
     CPU_FloatU a;
@@ -404,7 +571,7 @@  static inline uint64_t float32_to_f(float32 fa)
     return r;
 }
 
-static inline float32 f_to_float32(uint64_t a)
+static float32 f_to_float32(uint64_t a)
 {
     uint32_t exp, mant_sig;
     CPU_FloatU r;
@@ -447,58 +614,83 @@  uint64_t helper_memory_to_f (uint32_t a)
     return r;
 }
 
-uint64_t helper_addf (uint64_t a, uint64_t b)
+uint64_t helper_addf (uint64_t a, uint64_t b, uint32_t quals)
 {
     float32 fa, fb, fr;
+    uint32_t token;
 
     fa = f_to_float32(a);
     fb = f_to_float32(b);
+
+    token = begin_fp(quals);
     fr = float32_add(fa, fb, &FP_STATUS);
+    end_fp(quals, token);
+
     return float32_to_f(fr);
 }
 
-uint64_t helper_subf (uint64_t a, uint64_t b)
+uint64_t helper_subf (uint64_t a, uint64_t b, uint32_t quals)
 {
     float32 fa, fb, fr;
+    uint32_t token;
 
     fa = f_to_float32(a);
     fb = f_to_float32(b);
+
+    token = begin_fp(quals);
     fr = float32_sub(fa, fb, &FP_STATUS);
+    end_fp(quals, token);
+
     return float32_to_f(fr);
 }
 
-uint64_t helper_mulf (uint64_t a, uint64_t b)
+uint64_t helper_mulf (uint64_t a, uint64_t b, uint32_t quals)
 {
     float32 fa, fb, fr;
+    uint32_t token;
 
     fa = f_to_float32(a);
     fb = f_to_float32(b);
+
+    token = begin_fp(quals);
     fr = float32_mul(fa, fb, &FP_STATUS);
+    end_fp(quals, token);
+
     return float32_to_f(fr);
 }
 
-uint64_t helper_divf (uint64_t a, uint64_t b)
+uint64_t helper_divf (uint64_t a, uint64_t b, uint32_t quals)
 {
     float32 fa, fb, fr;
+    uint32_t token;
 
     fa = f_to_float32(a);
     fb = f_to_float32(b);
+
+    token = begin_fp(quals);
     fr = float32_div(fa, fb, &FP_STATUS);
+    end_fp(quals, token);
+
     return float32_to_f(fr);
 }
 
-uint64_t helper_sqrtf (uint64_t t)
+uint64_t helper_sqrtf (uint64_t t, uint32_t quals)
 {
     float32 ft, fr;
+    uint32_t token;
 
     ft = f_to_float32(t);
+
+    token = begin_fp(quals);
     fr = float32_sqrt(ft, &FP_STATUS);
+    end_fp(quals, token);
+
     return float32_to_f(fr);
 }
 
 
 /* G floating (VAX) */
-static inline uint64_t float64_to_g(float64 fa)
+static uint64_t float64_to_g(float64 fa)
 {
     uint64_t r, exp, mant, sig;
     CPU_DoubleU a;
@@ -531,7 +723,7 @@  static inline uint64_t float64_to_g(float64 fa)
     return r;
 }
 
-static inline float64 g_to_float64(uint64_t a)
+static float64 g_to_float64(uint64_t a)
 {
     uint64_t exp, mant_sig;
     CPU_DoubleU r;
@@ -574,52 +766,77 @@  uint64_t helper_memory_to_g (uint64_t a)
     return r;
 }
 
-uint64_t helper_addg (uint64_t a, uint64_t b)
+uint64_t helper_addg (uint64_t a, uint64_t b, uint32_t quals)
 {
     float64 fa, fb, fr;
+    uint32_t token;
 
     fa = g_to_float64(a);
     fb = g_to_float64(b);
+
+    token = begin_fp(quals);
     fr = float64_add(fa, fb, &FP_STATUS);
+    end_fp(quals, token);
+
     return float64_to_g(fr);
 }
 
-uint64_t helper_subg (uint64_t a, uint64_t b)
+uint64_t helper_subg (uint64_t a, uint64_t b, uint32_t quals)
 {
     float64 fa, fb, fr;
+    uint32_t token;
 
     fa = g_to_float64(a);
     fb = g_to_float64(b);
+
+    token = begin_fp(quals);
     fr = float64_sub(fa, fb, &FP_STATUS);
+    end_fp(quals, token);
+
     return float64_to_g(fr);
 }
 
-uint64_t helper_mulg (uint64_t a, uint64_t b)
+uint64_t helper_mulg (uint64_t a, uint64_t b, uint32_t quals)
 {
     float64 fa, fb, fr;
-
+    uint32_t token;
+    
     fa = g_to_float64(a);
     fb = g_to_float64(b);
+
+    token = begin_fp(quals);
     fr = float64_mul(fa, fb, &FP_STATUS);
+    end_fp(quals, token);
+
     return float64_to_g(fr);
 }
 
-uint64_t helper_divg (uint64_t a, uint64_t b)
+uint64_t helper_divg (uint64_t a, uint64_t b, uint32_t quals)
 {
     float64 fa, fb, fr;
+    uint32_t token;
 
     fa = g_to_float64(a);
     fb = g_to_float64(b);
+
+    token = begin_fp(quals);
     fr = float64_div(fa, fb, &FP_STATUS);
+    end_fp(quals, token);
+
     return float64_to_g(fr);
 }
 
-uint64_t helper_sqrtg (uint64_t a)
+uint64_t helper_sqrtg (uint64_t a, uint32_t quals)
 {
     float64 fa, fr;
+    uint32_t token;
 
     fa = g_to_float64(a);
+
+    token = begin_fp(quals);
     fr = float64_sqrt(fa, &FP_STATUS);
+    end_fp(quals, token);
+
     return float64_to_g(fr);
 }
 
@@ -627,7 +844,7 @@  uint64_t helper_sqrtg (uint64_t a)
 /* S floating (single) */
 
 /* Taken from linux/arch/alpha/kernel/traps.c, s_mem_to_reg.  */
-static inline uint64_t float32_to_s_int(uint32_t fi)
+static uint64_t float32_to_s_int(uint32_t fi)
 {
     uint32_t frac = fi & 0x7fffff;
     uint32_t sign = fi >> 31;
@@ -649,7 +866,7 @@  static inline uint64_t float32_to_s_int(uint32_t fi)
             | ((uint64_t)frac << 29));
 }
 
-static inline uint64_t float32_to_s(float32 fa)
+static uint64_t float32_to_s(float32 fa)
 {
     CPU_FloatU a;
     a.f = fa;
@@ -678,52 +895,77 @@  uint64_t helper_memory_to_s (uint32_t a)
     return float32_to_s_int(a);
 }
 
-uint64_t helper_adds (uint64_t a, uint64_t b)
+static float32 input_s(uint32_t quals, uint64_t a)
+{
+    return s_to_float32(remap_ieee_input(quals, a));
+}
+
+uint64_t helper_adds (uint64_t a, uint64_t b, uint32_t quals)
 {
     float32 fa, fb, fr;
+    uint32_t token;
 
-    fa = s_to_float32(a);
-    fb = s_to_float32(b);
+    token = begin_fp(quals);
+    fa = input_s(quals, a);
+    fb = input_s(quals, b);
     fr = float32_add(fa, fb, &FP_STATUS);
+    end_fp(quals, token);
+
     return float32_to_s(fr);
 }
 
-uint64_t helper_subs (uint64_t a, uint64_t b)
+uint64_t helper_subs (uint64_t a, uint64_t b, uint32_t quals)
 {
     float32 fa, fb, fr;
+    uint32_t token;
 
-    fa = s_to_float32(a);
-    fb = s_to_float32(b);
+    token = begin_fp(quals);
+    fa = input_s(quals, a);
+    fb = input_s(quals, b);
     fr = float32_sub(fa, fb, &FP_STATUS);
+    end_fp(quals, token);
+
     return float32_to_s(fr);
 }
 
-uint64_t helper_muls (uint64_t a, uint64_t b)
+uint64_t helper_muls (uint64_t a, uint64_t b, uint32_t quals)
 {
     float32 fa, fb, fr;
+    uint32_t token;
 
-    fa = s_to_float32(a);
-    fb = s_to_float32(b);
+    token = begin_fp(quals);
+    fa = input_s(quals, a);
+    fb = input_s(quals, b);
     fr = float32_mul(fa, fb, &FP_STATUS);
+    end_fp(quals, token);
+
     return float32_to_s(fr);
 }
 
-uint64_t helper_divs (uint64_t a, uint64_t b)
+uint64_t helper_divs (uint64_t a, uint64_t b, uint32_t quals)
 {
     float32 fa, fb, fr;
+    uint32_t token;
 
-    fa = s_to_float32(a);
-    fb = s_to_float32(b);
+    token = begin_fp(quals);
+    fa = input_s(quals, a);
+    fb = input_s(quals, b);
     fr = float32_div(fa, fb, &FP_STATUS);
+    end_fp(quals, token);
+
     return float32_to_s(fr);
 }
 
-uint64_t helper_sqrts (uint64_t a)
+uint64_t helper_sqrts (uint64_t a, uint32_t quals)
 {
     float32 fa, fr;
+    uint32_t token;
 
-    fa = s_to_float32(a);
+    token = begin_fp(quals);
+    fa = input_s(quals, a);
     fr = float32_sqrt(fa, &FP_STATUS);
+    end_fp(quals, token);
+
     return float32_to_s(fr);
 }
 
@@ -745,52 +987,78 @@  static inline uint64_t float64_to_t(float64 fa)
     return r.ll;
 }
 
-uint64_t helper_addt (uint64_t a, uint64_t b)
+/* Raise any exceptions needed for using F, given the insn qualifiers.  */
+static float64 input_t(uint32_t quals, uint64_t a)
+{
+    return t_to_float64(remap_ieee_input(quals, a));
+}
+
+uint64_t helper_addt (uint64_t a, uint64_t b, uint32_t quals)
 {
     float64 fa, fb, fr;
+    uint32_t token;
 
-    fa = t_to_float64(a);
-    fb = t_to_float64(b);
+    token = begin_fp(quals);
+    fa = input_t(quals, a);
+    fb = input_t(quals, b);
     fr = float64_add(fa, fb, &FP_STATUS);
+    end_fp(quals, token);
+
     return float64_to_t(fr);
 }
 
-uint64_t helper_subt (uint64_t a, uint64_t b)
+uint64_t helper_subt (uint64_t a, uint64_t b, uint32_t quals)
 {
     float64 fa, fb, fr;
+    uint32_t token;
 
-    fa = t_to_float64(a);
-    fb = t_to_float64(b);
+    token = begin_fp(quals);
+    fa = input_t(quals, a);
+    fb = input_t(quals, b);
     fr = float64_sub(fa, fb, &FP_STATUS);
+    end_fp(quals, token);
+    
     return float64_to_t(fr);
 }
 
-uint64_t helper_mult (uint64_t a, uint64_t b)
+uint64_t helper_mult (uint64_t a, uint64_t b, uint32_t quals)
 {
     float64 fa, fb, fr;
+    uint32_t token;
 
-    fa = t_to_float64(a);
-    fb = t_to_float64(b);
+    token = begin_fp(quals);
+    fa = input_t(quals, a);
+    fb = input_t(quals, b);
     fr = float64_mul(fa, fb, &FP_STATUS);
+    end_fp(quals, token);
+
     return float64_to_t(fr);
 }
 
-uint64_t helper_divt (uint64_t a, uint64_t b)
+uint64_t helper_divt (uint64_t a, uint64_t b, uint32_t quals)
 {
     float64 fa, fb, fr;
+    uint32_t token;
 
-    fa = t_to_float64(a);
-    fb = t_to_float64(b);
+    token = begin_fp(quals);
+    fa = input_t(quals, a);
+    fb = input_t(quals, b);
     fr = float64_div(fa, fb, &FP_STATUS);
+    end_fp(quals, token);
+
     return float64_to_t(fr);
 }
 
-uint64_t helper_sqrtt (uint64_t a)
+uint64_t helper_sqrtt (uint64_t a, uint32_t quals)
 {
     float64 fa, fr;
+    uint32_t token;
 
-    fa = t_to_float64(a);
+    token = begin_fp(quals);
+    fa = input_t(quals, a);
     fr = float64_sqrt(fa, &FP_STATUS);
+    end_fp(quals, token);
+
     return float64_to_t(fr);
 }
 
@@ -813,6 +1081,8 @@  uint64_t helper_cpyse(uint64_t a, uint64_t b)
 
 
 /* Comparisons */
+/* ??? Software completion qualifier missing.  */
+
 uint64_t helper_cmptun (uint64_t a, uint64_t b)
 {
     float64 fa, fb;
@@ -905,70 +1175,218 @@  uint64_t helper_cmpglt(uint64_t a, uint64_t b)
 }
 
 /* Floating point format conversion */
-uint64_t helper_cvtts (uint64_t a)
+uint64_t helper_cvtts (uint64_t a, uint32_t quals)
 {
     float64 fa;
     float32 fr;
+    uint32_t token;
 
-    fa = t_to_float64(a);
+    token = begin_fp(quals);
+    fa = input_t(quals, a);
     fr = float64_to_float32(fa, &FP_STATUS);
+    end_fp(quals, token);
+
     return float32_to_s(fr);
 }
 
-uint64_t helper_cvtst (uint64_t a)
+uint64_t helper_cvtst (uint64_t a, uint32_t quals)
 {
     float32 fa;
     float64 fr;
+    uint32_t token;
 
-    fa = s_to_float32(a);
+    token = begin_fp(quals);
+    fa = input_s(quals, a);
     fr = float32_to_float64(fa, &FP_STATUS);
+    end_fp(quals, token);
+
     return float64_to_t(fr);
 }
 
-uint64_t helper_cvtqs (uint64_t a)
+uint64_t helper_cvtqs (uint64_t a, uint32_t quals)
 {
-    float32 fr = int64_to_float32(a, &FP_STATUS);
+    float32 fr;
+    uint32_t token;
+
+    token = begin_fp(quals);
+    fr = int64_to_float32(a, &FP_STATUS);
+    end_fp(quals, token);
+
     return float32_to_s(fr);
 }
 
-uint64_t helper_cvttq (uint64_t a)
+/* Implement float64 to uint64 conversion without overflow enabled.
+   In this mode we must supply the truncated result.  This behaviour
+   is used by the compiler to get unsigned conversion for free with
+   the same instruction.  */
+
+static uint64_t cvttq_internal(uint64_t a)
 {
-    float64 fa = t_to_float64(a);
-    return float64_to_int64_round_to_zero(fa, &FP_STATUS);
+    uint64_t frac, ret = 0;
+    uint32_t exp, sign, exc = 0;
+    int shift;
+
+    sign = (a >> 63);
+    exp = (uint32_t)(a >> 52) & 0x7ff;
+    frac = a & 0xfffffffffffffull;
+
+    if (exp == 0) {
+        if (unlikely(frac != 0))
+            goto do_underflow;
+    } else if (exp == 0x7ff) {
+        if (frac == 0)
+            exc = float_flag_overflow;
+        else
+            exc = float_flag_invalid;
+    } else {
+        /* Restore implicit bit.  */
+        frac |= 0x10000000000000ull;
+
+        /* Note that neither overflow exceptions nor inexact exceptions
+           are desired.  This lets us streamline the checks quite a bit.  */
+        shift = exp - 1023 - 52;
+        if (shift >= 0) {
+            /* In this case the number is so large that we must shift
+               the fraction left.  There is no rounding to do.  */
+            if (shift < 63) {
+                ret = frac << shift;
+                if ((ret >> shift) != frac)
+                    exc = float_flag_overflow;
+            }
+        } else {
+            uint64_t round;
+
+            /* In this case the number is smaller than the fraction as
+               represented by the 52 bit number.  Here we must think 
+               about rounding the result.  Handle this by shifting the
+               fractional part of the number into the high bits of ROUND.
+               This will let us efficiently handle round-to-nearest.  */
+            shift = -shift;
+            if (shift < 63) {
+                ret = frac >> shift;
+                round = frac << (64 - shift);
+            } else {
+                /* The exponent is so small we shift out everything.
+                   Leave a sticky bit for proper rounding below.  */
+            do_underflow:
+                round = 1;
+            }
+
+            if (round) {
+                exc = float_flag_inexact;
+                switch (FP_STATUS.float_rounding_mode) {
+                case float_round_nearest_even:
+                    if (round == (1ull << 63)) {
+                        /* Fraction is exactly 0.5; round to even.  */
+                        ret += (ret & 1);
+                    } else if (round > (1ull << 63)) {
+                        ret += 1;
+                    }
+                    break;
+                case float_round_to_zero:
+                    break;
+                case float_round_up:
+                    if (!sign)
+                        ret += 1;
+                    break;
+                case float_round_down:
+                    if (sign)
+                        ret += 1;
+                    break;
+                }
+            }
+        }
+        if (sign)
+            ret = -ret;
+    }
+    if (unlikely(exc))
+        float_raise(exc, &FP_STATUS);
+
+    return ret;
+}
+
+uint64_t helper_cvttq (uint64_t a, uint32_t quals)
+{
+    uint64_t ret;
+    uint32_t token;
+
+    /* ??? There's an arugument to be made that when /S is enabled, we
+       should provide the standard IEEE saturated result, instead of
+       the truncated result that we *must* provide when /V is disabled.
+       However, that's not how either the Tru64 or Linux completion
+       handlers actually work, and GCC knows it.  */
+
+    token = begin_fp(quals);
+    a = remap_ieee_input(quals, a);
+    ret = cvttq_internal(a);
+    end_fp(quals, token);
+
+    return ret;
 }
 
-uint64_t helper_cvtqt (uint64_t a)
+uint64_t helper_cvtqt (uint64_t a, uint32_t quals)
 {
-    float64 fr = int64_to_float64(a, &FP_STATUS);
+    float64 fr;
+    uint32_t token;
+
+    token = begin_fp(quals);
+    fr = int64_to_float64(a, &FP_STATUS);
+    end_fp(quals, token);
+
     return float64_to_t(fr);
 }
 
-uint64_t helper_cvtqf (uint64_t a)
+uint64_t helper_cvtqf (uint64_t a, uint32_t quals)
 {
-    float32 fr = int64_to_float32(a, &FP_STATUS);
+    float32 fr;
+    uint32_t token;
+
+    token = begin_fp(quals);
+    fr = int64_to_float32(a, &FP_STATUS);
+    end_fp(quals, token);
+
     return float32_to_f(fr);
 }
 
-uint64_t helper_cvtgf (uint64_t a)
+uint64_t helper_cvtgf (uint64_t a, uint32_t quals)
 {
     float64 fa;
     float32 fr;
+    uint32_t token;
 
     fa = g_to_float64(a);
+
+    token = begin_fp(quals);
     fr = float64_to_float32(fa, &FP_STATUS);
+    end_fp(quals, token);
+
     return float32_to_f(fr);
 }
 
-uint64_t helper_cvtgq (uint64_t a)
+uint64_t helper_cvtgq (uint64_t a, uint32_t quals)
 {
-    float64 fa = g_to_float64(a);
-    return float64_to_int64_round_to_zero(fa, &FP_STATUS);
+    float64 fa;
+    uint64_t ret;
+    uint32_t token;
+
+    fa = g_to_float64(a);
+
+    token = begin_fp(quals);
+    ret = float64_to_int64(fa, &FP_STATUS);
+    end_fp(quals, token);
+
+    return ret;
 }
 
-uint64_t helper_cvtqg (uint64_t a)
+uint64_t helper_cvtqg (uint64_t a, uint32_t quals)
 {
     float64 fr;
+    uint32_t token;
+
+    token = begin_fp(quals);
     fr = int64_to_float64(a, &FP_STATUS);
+    end_fp(quals, token);
+
     return float64_to_g(fr);
 }
 
@@ -979,35 +1397,24 @@  uint64_t helper_cvtlq (uint64_t a)
     return (lo & 0x3FFFFFFF) | (hi & 0xc0000000);
 }
 
-static inline uint64_t __helper_cvtql(uint64_t a, int s, int v)
-{
-    uint64_t r;
-
-    r = ((uint64_t)(a & 0xC0000000)) << 32;
-    r |= ((uint64_t)(a & 0x7FFFFFFF)) << 29;
-
-    if (v && (int64_t)((int32_t)r) != (int64_t)r) {
-        helper_excp(EXCP_ARITH, EXCP_ARITH_OVERFLOW);
-    }
-    if (s) {
-        /* TODO */
-    }
-    return r;
-}
-
 uint64_t helper_cvtql (uint64_t a)
 {
-    return __helper_cvtql(a, 0, 0);
+    return ((a & 0xC0000000) << 32) | ((a & 0x7FFFFFFF) << 29);
 }
 
 uint64_t helper_cvtqlv (uint64_t a)
 {
-    return __helper_cvtql(a, 0, 1);
+    if ((int32_t)a != (int64_t)a)
+        helper_excp(EXCP_ARITH, EXC_M_IOV);
+    return helper_cvtql(a);
 }
 
 uint64_t helper_cvtqlsv (uint64_t a)
 {
-    return __helper_cvtql(a, 1, 1);
+    /* ??? I'm pretty sure there's nothing that /sv needs to do that /v
+       doesn't do.  The only thing I can think is that /sv is a valid
+       instruction merely for completeness in the ISA.  */
+    return helper_cvtqlv(a);
 }
 
 /* PALcode support special instructions */
diff --git a/target-alpha/translate.c b/target-alpha/translate.c
index 45cb697..e0ca0ed 100644
--- a/target-alpha/translate.c
+++ b/target-alpha/translate.c
@@ -442,81 +442,79 @@  static void gen_fcmov(TCGCond inv_cond, int ra, int rb, int rc)
     gen_set_label(l1);
 }
 
-#define FARITH2(name)                                       \
-static inline void glue(gen_f, name)(int rb, int rc)        \
-{                                                           \
-    if (unlikely(rc == 31))                                 \
-      return;                                               \
-                                                            \
-    if (rb != 31)                                           \
-        gen_helper_ ## name (cpu_fir[rc], cpu_fir[rb]);    \
-    else {                                                  \
-        TCGv tmp = tcg_const_i64(0);                        \
-        gen_helper_ ## name (cpu_fir[rc], tmp);            \
-        tcg_temp_free(tmp);                                 \
-    }                                                       \
+#define FARITH2(name)                                   \
+static inline void glue(gen_f, name)(int rb, int rc)    \
+{                                                       \
+    if (unlikely(rc == 31))                             \
+      return;                                           \
+                                                        \
+    if (rb != 31)                                       \
+        gen_helper_ ## name (cpu_fir[rc], cpu_fir[rb]); \
+    else {                                              \
+        TCGv tmp = tcg_const_i64(0);                    \
+        gen_helper_ ## name (cpu_fir[rc], tmp);         \
+        tcg_temp_free(tmp);                             \
+    }                                                   \
 }
-FARITH2(sqrts)
-FARITH2(sqrtf)
-FARITH2(sqrtg)
-FARITH2(sqrtt)
-FARITH2(cvtgf)
-FARITH2(cvtgq)
-FARITH2(cvtqf)
-FARITH2(cvtqg)
-FARITH2(cvtst)
-FARITH2(cvtts)
-FARITH2(cvttq)
-FARITH2(cvtqs)
-FARITH2(cvtqt)
 FARITH2(cvtlq)
 FARITH2(cvtql)
 FARITH2(cvtqlv)
 FARITH2(cvtqlsv)
 
-#define FARITH3(name)                                                     \
-static inline void glue(gen_f, name)(int ra, int rb, int rc)              \
-{                                                                         \
-    if (unlikely(rc == 31))                                               \
-        return;                                                           \
-                                                                          \
-    if (ra != 31) {                                                       \
-        if (rb != 31)                                                     \
-            gen_helper_ ## name (cpu_fir[rc], cpu_fir[ra], cpu_fir[rb]);  \
-        else {                                                            \
-            TCGv tmp = tcg_const_i64(0);                                  \
-            gen_helper_ ## name (cpu_fir[rc], cpu_fir[ra], tmp);          \
-            tcg_temp_free(tmp);                                           \
-        }                                                                 \
-    } else {                                                              \
-        TCGv tmp = tcg_const_i64(0);                                      \
-        if (rb != 31)                                                     \
-            gen_helper_ ## name (cpu_fir[rc], tmp, cpu_fir[rb]);          \
-        else                                                              \
-            gen_helper_ ## name (cpu_fir[rc], tmp, tmp);                   \
-        tcg_temp_free(tmp);                                               \
-    }                                                                     \
+#define QFARITH2(name)                                          \
+static inline void glue(gen_f, name)(int rb, int rc, int opc)   \
+{                                                               \
+    TCGv_i32 quals;                                             \
+    if (unlikely(rc == 31))                                     \
+      return;                                                   \
+    quals = tcg_const_i32(opc & ~0x3f);                         \
+    if (rb != 31)                                               \
+        gen_helper_ ## name (cpu_fir[rc], cpu_fir[rb], quals);  \
+    else {                                                      \
+        TCGv tmp = tcg_const_i64(0);                            \
+        gen_helper_ ## name (cpu_fir[rc], tmp, quals);          \
+        tcg_temp_free(tmp);                                     \
+    }                                                           \
+    tcg_temp_free_i32(quals);                                   \
+}
+QFARITH2(sqrts)
+QFARITH2(sqrtf)
+QFARITH2(sqrtg)
+QFARITH2(sqrtt)
+QFARITH2(cvtgf)
+QFARITH2(cvtgq)
+QFARITH2(cvtqf)
+QFARITH2(cvtqg)
+QFARITH2(cvtst)
+QFARITH2(cvtts)
+QFARITH2(cvttq)
+QFARITH2(cvtqs)
+QFARITH2(cvtqt)
+
+#define FARITH3(name)                                           \
+static inline void glue(gen_f, name)(int ra, int rb, int rc)    \
+{                                                               \
+    TCGv zero, ta, tb;                                          \
+    if (unlikely(rc == 31))                                     \
+        return;                                                 \
+    ta = cpu_fir[ra];                                           \
+    tb = cpu_fir[rb];                                           \
+    if (unlikely(ra == 31)) {                                   \
+        zero = tcg_const_i64(0);                                \
+        ta = zero;                                              \
+    }                                                           \
+    if (unlikely(rb == 31)) {                                   \
+        if (ra != 31)                                           \
+            zero = tcg_const_i64(0);                            \
+        tb = zero;                                              \
+    }                                                           \
+    gen_helper_ ## name (cpu_fir[rc], ta, tb);                  \
+    if (ra == 31 || rb == 31)                                   \
+        tcg_temp_free(zero);                                    \
 }
-
-FARITH3(addf)
-FARITH3(subf)
-FARITH3(mulf)
-FARITH3(divf)
-FARITH3(addg)
-FARITH3(subg)
-FARITH3(mulg)
-FARITH3(divg)
 FARITH3(cmpgeq)
 FARITH3(cmpglt)
 FARITH3(cmpgle)
-FARITH3(adds)
-FARITH3(subs)
-FARITH3(muls)
-FARITH3(divs)
-FARITH3(addt)
-FARITH3(subt)
-FARITH3(mult)
-FARITH3(divt)
 FARITH3(cmptun)
 FARITH3(cmpteq)
 FARITH3(cmptlt)
@@ -525,6 +523,47 @@  FARITH3(cpys)
 FARITH3(cpysn)
 FARITH3(cpyse)
 
+#define QFARITH3(name)                                                  \
+static inline void glue(gen_f, name)(int ra, int rb, int rc, int opc)   \
+{                                                                       \
+    TCGv zero, ta, tb;                                                  \
+    TCGv_i32 quals;                                                     \
+    if (unlikely(rc == 31))                                             \
+        return;                                                         \
+    ta = cpu_fir[ra];                                                   \
+    tb = cpu_fir[rb];                                                   \
+    if (unlikely(ra == 31)) {                                           \
+        zero = tcg_const_i64(0);                                        \
+        ta = zero;                                                      \
+    }                                                                   \
+    if (unlikely(rb == 31)) {                                           \
+        if (ra != 31)                                                   \
+            zero = tcg_const_i64(0);                                    \
+        tb = zero;                                                      \
+    }                                                                   \
+    quals = tcg_const_i32(opc & ~0x3f);                                 \
+    gen_helper_ ## name (cpu_fir[rc], ta, tb, quals);                   \
+    tcg_temp_free_i32(quals);                                           \
+    if (ra == 31 || rb == 31)                                           \
+        tcg_temp_free(zero);                                            \
+}
+QFARITH3(addf)
+QFARITH3(subf)
+QFARITH3(mulf)
+QFARITH3(divf)
+QFARITH3(addg)
+QFARITH3(subg)
+QFARITH3(mulg)
+QFARITH3(divg)
+QFARITH3(adds)
+QFARITH3(subs)
+QFARITH3(muls)
+QFARITH3(divs)
+QFARITH3(addt)
+QFARITH3(subt)
+QFARITH3(mult)
+QFARITH3(divt)
+
 static inline uint64_t zapnot_mask(uint8_t lit)
 {
     uint64_t mask = 0;
@@ -1607,7 +1646,7 @@  static inline int translate_one(DisasContext *ctx, uint32_t insn)
         }
         break;
     case 0x14:
-        switch (fpfn) { /* f11 & 0x3F */
+        switch (fpfn) { /* fn11 & 0x3F */
         case 0x04:
             /* ITOFS */
             if (!(ctx->amask & AMASK_FIX))
@@ -1626,13 +1665,13 @@  static inline int translate_one(DisasContext *ctx, uint32_t insn)
             /* SQRTF */
             if (!(ctx->amask & AMASK_FIX))
                 goto invalid_opc;
-            gen_fsqrtf(rb, rc);
+            gen_fsqrtf(rb, rc, fn11);
             break;
         case 0x0B:
             /* SQRTS */
             if (!(ctx->amask & AMASK_FIX))
                 goto invalid_opc;
-            gen_fsqrts(rb, rc);
+            gen_fsqrts(rb, rc, fn11);
             break;
         case 0x14:
             /* ITOFF */
@@ -1663,13 +1702,13 @@  static inline int translate_one(DisasContext *ctx, uint32_t insn)
             /* SQRTG */
             if (!(ctx->amask & AMASK_FIX))
                 goto invalid_opc;
-            gen_fsqrtg(rb, rc);
+            gen_fsqrtg(rb, rc, fn11);
             break;
         case 0x02B:
             /* SQRTT */
             if (!(ctx->amask & AMASK_FIX))
                 goto invalid_opc;
-            gen_fsqrtt(rb, rc);
+            gen_fsqrtt(rb, rc, fn11);
             break;
         default:
             goto invalid_opc;
@@ -1677,47 +1716,42 @@  static inline int translate_one(DisasContext *ctx, uint32_t insn)
         break;
     case 0x15:
         /* VAX floating point */
-        /* XXX: rounding mode and trap are ignored (!) */
-        switch (fpfn) { /* f11 & 0x3F */
+        switch (fpfn) { /* fn11 & 0x3F */
         case 0x00:
             /* ADDF */
-            gen_faddf(ra, rb, rc);
+            gen_faddf(ra, rb, rc, fn11);
             break;
         case 0x01:
             /* SUBF */
-            gen_fsubf(ra, rb, rc);
+            gen_fsubf(ra, rb, rc, fn11);
             break;
         case 0x02:
             /* MULF */
-            gen_fmulf(ra, rb, rc);
+            gen_fmulf(ra, rb, rc, fn11);
             break;
         case 0x03:
             /* DIVF */
-            gen_fdivf(ra, rb, rc);
+            gen_fdivf(ra, rb, rc, fn11);
             break;
         case 0x1E:
             /* CVTDG */
-#if 0 // TODO
-            gen_fcvtdg(rb, rc);
-#else
+            /* TODO */
             goto invalid_opc;
-#endif
-            break;
         case 0x20:
             /* ADDG */
-            gen_faddg(ra, rb, rc);
+            gen_faddg(ra, rb, rc, fn11);
             break;
         case 0x21:
             /* SUBG */
-            gen_fsubg(ra, rb, rc);
+            gen_fsubg(ra, rb, rc, fn11);
             break;
         case 0x22:
             /* MULG */
-            gen_fmulg(ra, rb, rc);
+            gen_fmulg(ra, rb, rc, fn11);
             break;
         case 0x23:
             /* DIVG */
-            gen_fdivg(ra, rb, rc);
+            gen_fdivg(ra, rb, rc, fn11);
             break;
         case 0x25:
             /* CMPGEQ */
@@ -1733,27 +1767,23 @@  static inline int translate_one(DisasContext *ctx, uint32_t insn)
             break;
         case 0x2C:
             /* CVTGF */
-            gen_fcvtgf(rb, rc);
+            gen_fcvtgf(rb, rc, fn11);
             break;
         case 0x2D:
             /* CVTGD */
-#if 0 // TODO
-            gen_fcvtgd(rb, rc);
-#else
+            /* TODO */
             goto invalid_opc;
-#endif
-            break;
         case 0x2F:
             /* CVTGQ */
-            gen_fcvtgq(rb, rc);
+            gen_fcvtgq(rb, rc, fn11);
             break;
         case 0x3C:
             /* CVTQF */
-            gen_fcvtqf(rb, rc);
+            gen_fcvtqf(rb, rc, fn11);
             break;
         case 0x3E:
             /* CVTQG */
-            gen_fcvtqg(rb, rc);
+            gen_fcvtqg(rb, rc, fn11);
             break;
         default:
             goto invalid_opc;
@@ -1761,39 +1791,38 @@  static inline int translate_one(DisasContext *ctx, uint32_t insn)
         break;
     case 0x16:
         /* IEEE floating-point */
-        /* XXX: rounding mode and traps are ignored (!) */
-        switch (fpfn) { /* f11 & 0x3F */
+        switch (fpfn) { /* fn11 & 0x3F */
         case 0x00:
             /* ADDS */
-            gen_fadds(ra, rb, rc);
+            gen_fadds(ra, rb, rc, fn11);
             break;
         case 0x01:
             /* SUBS */
-            gen_fsubs(ra, rb, rc);
+            gen_fsubs(ra, rb, rc, fn11);
             break;
         case 0x02:
             /* MULS */
-            gen_fmuls(ra, rb, rc);
+            gen_fmuls(ra, rb, rc, fn11);
             break;
         case 0x03:
             /* DIVS */
-            gen_fdivs(ra, rb, rc);
+            gen_fdivs(ra, rb, rc, fn11);
             break;
         case 0x20:
             /* ADDT */
-            gen_faddt(ra, rb, rc);
+            gen_faddt(ra, rb, rc, fn11);
             break;
         case 0x21:
             /* SUBT */
-            gen_fsubt(ra, rb, rc);
+            gen_fsubt(ra, rb, rc, fn11);
             break;
         case 0x22:
             /* MULT */
-            gen_fmult(ra, rb, rc);
+            gen_fmult(ra, rb, rc, fn11);
             break;
         case 0x23:
             /* DIVT */
-            gen_fdivt(ra, rb, rc);
+            gen_fdivt(ra, rb, rc, fn11);
             break;
         case 0x24:
             /* CMPTUN */
@@ -1812,26 +1841,25 @@  static inline int translate_one(DisasContext *ctx, uint32_t insn)
             gen_fcmptle(ra, rb, rc);
             break;
         case 0x2C:
-            /* XXX: incorrect */
             if (fn11 == 0x2AC || fn11 == 0x6AC) {
                 /* CVTST */
-                gen_fcvtst(rb, rc);
+                gen_fcvtst(rb, rc, fn11);
             } else {
                 /* CVTTS */
-                gen_fcvtts(rb, rc);
+                gen_fcvtts(rb, rc, fn11);
             }
             break;
         case 0x2F:
             /* CVTTQ */
-            gen_fcvttq(rb, rc);
+            gen_fcvttq(rb, rc, fn11);
             break;
         case 0x3C:
             /* CVTQS */
-            gen_fcvtqs(rb, rc);
+            gen_fcvtqs(rb, rc, fn11);
             break;
         case 0x3E:
             /* CVTQT */
-            gen_fcvtqt(rb, rc);
+            gen_fcvtqt(rb, rc, fn11);
             break;
         default:
             goto invalid_opc;