diff mbox

[v1] target/ppc: rewrite f[n]m[add, sub] using float64_muladd

Message ID 1488381854-7275-1-git-send-email-nikunj@linux.vnet.ibm.com
State New
Headers show

Commit Message

Nikunj A Dadhania March 1, 2017, 3:24 p.m. UTC
Use the softfloat api for fused multiply-add. Also, generate VXISI using
a helper function by computing intermediate result.

Signed-off-by: Nikunj A Dadhania <nikunj@linux.vnet.ibm.com>

---

v0:
* Use MADD/MSUB_FLAGS as used by VSX instructions
* Introduce helper float64_madd_set_vxisi()
---
 target/ppc/fpu_helper.c | 218 +++++++++++-------------------------------------
 1 file changed, 49 insertions(+), 169 deletions(-)

Comments

David Gibson March 2, 2017, 12:29 a.m. UTC | #1
On Wed, Mar 01, 2017 at 08:54:14PM +0530, Nikunj A Dadhania wrote:
> Use the softfloat api for fused multiply-add. Also, generate VXISI using
> a helper function by computing intermediate result.

Um.. I really need some information on why this is a good thing to
do.  Is it a bugfix?  Enhancement? Simplification?

> 
> Signed-off-by: Nikunj A Dadhania <nikunj@linux.vnet.ibm.com>
> 
> ---
> 
> v0:
> * Use MADD/MSUB_FLAGS as used by VSX instructions
> * Introduce helper float64_madd_set_vxisi()
> ---
>  target/ppc/fpu_helper.c | 218 +++++++++++-------------------------------------
>  1 file changed, 49 insertions(+), 169 deletions(-)
> 
> diff --git a/target/ppc/fpu_helper.c b/target/ppc/fpu_helper.c
> index 58aee64..ed7e84a 100644
> --- a/target/ppc/fpu_helper.c
> +++ b/target/ppc/fpu_helper.c
> @@ -743,178 +743,63 @@ uint64_t helper_frim(CPUPPCState *env, uint64_t arg)
>      return do_fri(env, arg, float_round_down);
>  }
>  
> -/* fmadd - fmadd. */
> -uint64_t helper_fmadd(CPUPPCState *env, uint64_t arg1, uint64_t arg2,
> -                      uint64_t arg3)
> -{
> -    CPU_DoubleU farg1, farg2, farg3;
> -
> -    farg1.ll = arg1;
> -    farg2.ll = arg2;
> -    farg3.ll = arg3;
> -
> -    if (unlikely((float64_is_infinity(farg1.d) && float64_is_zero(farg2.d)) ||
> -                 (float64_is_zero(farg1.d) && float64_is_infinity(farg2.d)))) {
> -        /* Multiplication of zero by infinity */
> -        farg1.ll = float_invalid_op_excp(env, POWERPC_EXCP_FP_VXIMZ, 1);
> -    } else {
> -        if (unlikely(float64_is_signaling_nan(farg1.d, &env->fp_status) ||
> -                     float64_is_signaling_nan(farg2.d, &env->fp_status) ||
> -                     float64_is_signaling_nan(farg3.d, &env->fp_status))) {
> -            /* sNaN operation */
> -            float_invalid_op_excp(env, POWERPC_EXCP_FP_VXSNAN, 1);
> -        }
> -        /* This is the way the PowerPC specification defines it */
> -        float128 ft0_128, ft1_128;
> -
> -        ft0_128 = float64_to_float128(farg1.d, &env->fp_status);
> -        ft1_128 = float64_to_float128(farg2.d, &env->fp_status);
> -        ft0_128 = float128_mul(ft0_128, ft1_128, &env->fp_status);
> -        if (unlikely(float128_is_infinity(ft0_128) &&
> -                     float64_is_infinity(farg3.d) &&
> -                     float128_is_neg(ft0_128) != float64_is_neg(farg3.d))) {
> -            /* Magnitude subtraction of infinities */
> -            farg1.ll = float_invalid_op_excp(env, POWERPC_EXCP_FP_VXISI, 1);
> -        } else {
> -            ft1_128 = float64_to_float128(farg3.d, &env->fp_status);
> -            ft0_128 = float128_add(ft0_128, ft1_128, &env->fp_status);
> -            farg1.d = float128_to_float64(ft0_128, &env->fp_status);
> -        }
> -    }
> -
> -    return farg1.ll;
> -}
> -
> -/* fmsub - fmsub. */
> -uint64_t helper_fmsub(CPUPPCState *env, uint64_t arg1, uint64_t arg2,
> -                      uint64_t arg3)
> -{
> -    CPU_DoubleU farg1, farg2, farg3;
> -
> -    farg1.ll = arg1;
> -    farg2.ll = arg2;
> -    farg3.ll = arg3;
> -
> -    if (unlikely((float64_is_infinity(farg1.d) && float64_is_zero(farg2.d)) ||
> -                 (float64_is_zero(farg1.d) &&
> -                  float64_is_infinity(farg2.d)))) {
> -        /* Multiplication of zero by infinity */
> -        farg1.ll = float_invalid_op_excp(env, POWERPC_EXCP_FP_VXIMZ, 1);
> -    } else {
> -        if (unlikely(float64_is_signaling_nan(farg1.d, &env->fp_status) ||
> -                     float64_is_signaling_nan(farg2.d, &env->fp_status) ||
> -                     float64_is_signaling_nan(farg3.d, &env->fp_status))) {
> -            /* sNaN operation */
> -            float_invalid_op_excp(env, POWERPC_EXCP_FP_VXSNAN, 1);
> -        }
> -        /* This is the way the PowerPC specification defines it */
> -        float128 ft0_128, ft1_128;
> -
> -        ft0_128 = float64_to_float128(farg1.d, &env->fp_status);
> -        ft1_128 = float64_to_float128(farg2.d, &env->fp_status);
> -        ft0_128 = float128_mul(ft0_128, ft1_128, &env->fp_status);
> -        if (unlikely(float128_is_infinity(ft0_128) &&
> -                     float64_is_infinity(farg3.d) &&
> -                     float128_is_neg(ft0_128) == float64_is_neg(farg3.d))) {
> -            /* Magnitude subtraction of infinities */
> -            farg1.ll = float_invalid_op_excp(env, POWERPC_EXCP_FP_VXISI, 1);
> -        } else {
> -            ft1_128 = float64_to_float128(farg3.d, &env->fp_status);
> -            ft0_128 = float128_sub(ft0_128, ft1_128, &env->fp_status);
> -            farg1.d = float128_to_float64(ft0_128, &env->fp_status);
> -        }
> -    }
> -    return farg1.ll;
> -}
> +#define MADD_FLGS 0
> +#define MSUB_FLGS float_muladd_negate_c
> +#define NMADD_FLGS float_muladd_negate_result
> +#define NMSUB_FLGS (float_muladd_negate_c | float_muladd_negate_result)
>  
> -/* fnmadd - fnmadd. */
> -uint64_t helper_fnmadd(CPUPPCState *env, uint64_t arg1, uint64_t arg2,
> -                       uint64_t arg3)
> +static void float64_madd_set_vxisi(CPUPPCState *env, float64 a, float64 b,
> +                                   float64 c, unsigned int flags)
>  {
> -    CPU_DoubleU farg1, farg2, farg3;
> -
> -    farg1.ll = arg1;
> -    farg2.ll = arg2;
> -    farg3.ll = arg3;
> +    float64 f = float64_mul(a, b, &env->fp_status);
>  
> -    if (unlikely((float64_is_infinity(farg1.d) && float64_is_zero(farg2.d)) ||
> -                 (float64_is_zero(farg1.d) && float64_is_infinity(farg2.d)))) {
> -        /* Multiplication of zero by infinity */
> -        farg1.ll = float_invalid_op_excp(env, POWERPC_EXCP_FP_VXIMZ, 1);
> -    } else {
> -        if (unlikely(float64_is_signaling_nan(farg1.d, &env->fp_status) ||
> -                     float64_is_signaling_nan(farg2.d, &env->fp_status) ||
> -                     float64_is_signaling_nan(farg3.d, &env->fp_status))) {
> -            /* sNaN operation */
> -            float_invalid_op_excp(env, POWERPC_EXCP_FP_VXSNAN, 1);
> -        }
> -        /* This is the way the PowerPC specification defines it */
> -        float128 ft0_128, ft1_128;
> -
> -        ft0_128 = float64_to_float128(farg1.d, &env->fp_status);
> -        ft1_128 = float64_to_float128(farg2.d, &env->fp_status);
> -        ft0_128 = float128_mul(ft0_128, ft1_128, &env->fp_status);
> -        if (unlikely(float128_is_infinity(ft0_128) &&
> -                     float64_is_infinity(farg3.d) &&
> -                     float128_is_neg(ft0_128) != float64_is_neg(farg3.d))) {
> -            /* Magnitude subtraction of infinities */
> -            farg1.ll = float_invalid_op_excp(env, POWERPC_EXCP_FP_VXISI, 1);
> -        } else {
> -            ft1_128 = float64_to_float128(farg3.d, &env->fp_status);
> -            ft0_128 = float128_add(ft0_128, ft1_128, &env->fp_status);
> -            farg1.d = float128_to_float64(ft0_128, &env->fp_status);
> -        }
> -        if (likely(!float64_is_any_nan(farg1.d))) {
> -            farg1.d = float64_chs(farg1.d);
> +    /* a*b = ∞ and c = ∞, find ∞ - ∞ case and set VXISI */
> +    if (float64_is_infinity(f) && float64_is_infinity(c)) {
> +        if ((f ^ c) == 0) {
> +            /* Both negative/positive inifinity and substraction*/
> +            if (flags & MSUB_FLGS) {
> +                /* 1. ∞ - ∞
> +                 * 2. (-∞) - (-∞)
> +                 */
> +                float_invalid_op_excp(env, POWERPC_EXCP_FP_VXISI, 1);
> +            }
> +        } else if (!(flags & MSUB_FLGS)) {
> +            /* Opposite sign and addition
> +             * 1) ∞ + (-∞)
> +             * 2) (-∞) + ∞
> +             */
> +            float_invalid_op_excp(env, POWERPC_EXCP_FP_VXISI, 1);
>          }
>      }
> -    return farg1.ll;
>  }
>  
> -/* fnmsub - fnmsub. */
> -uint64_t helper_fnmsub(CPUPPCState *env, uint64_t arg1, uint64_t arg2,
> -                       uint64_t arg3)
> -{
> -    CPU_DoubleU farg1, farg2, farg3;
> -
> -    farg1.ll = arg1;
> -    farg2.ll = arg2;
> -    farg3.ll = arg3;
> -
> -    if (unlikely((float64_is_infinity(farg1.d) && float64_is_zero(farg2.d)) ||
> -                 (float64_is_zero(farg1.d) &&
> -                  float64_is_infinity(farg2.d)))) {
> -        /* Multiplication of zero by infinity */
> -        farg1.ll = float_invalid_op_excp(env, POWERPC_EXCP_FP_VXIMZ, 1);
> -    } else {
> -        if (unlikely(float64_is_signaling_nan(farg1.d, &env->fp_status) ||
> -                     float64_is_signaling_nan(farg2.d, &env->fp_status) ||
> -                     float64_is_signaling_nan(farg3.d, &env->fp_status))) {
> -            /* sNaN operation */
> -            float_invalid_op_excp(env, POWERPC_EXCP_FP_VXSNAN, 1);
> -        }
> -        /* This is the way the PowerPC specification defines it */
> -        float128 ft0_128, ft1_128;
> -
> -        ft0_128 = float64_to_float128(farg1.d, &env->fp_status);
> -        ft1_128 = float64_to_float128(farg2.d, &env->fp_status);
> -        ft0_128 = float128_mul(ft0_128, ft1_128, &env->fp_status);
> -        if (unlikely(float128_is_infinity(ft0_128) &&
> -                     float64_is_infinity(farg3.d) &&
> -                     float128_is_neg(ft0_128) == float64_is_neg(farg3.d))) {
> -            /* Magnitude subtraction of infinities */
> -            farg1.ll = float_invalid_op_excp(env, POWERPC_EXCP_FP_VXISI, 1);
> -        } else {
> -            ft1_128 = float64_to_float128(farg3.d, &env->fp_status);
> -            ft0_128 = float128_sub(ft0_128, ft1_128, &env->fp_status);
> -            farg1.d = float128_to_float64(ft0_128, &env->fp_status);
> -        }
> -        if (likely(!float64_is_any_nan(farg1.d))) {
> -            farg1.d = float64_chs(farg1.d);
> -        }
> -    }
> -    return farg1.ll;
> +#define FPU_FMADD(op, madd_flags)                                       \
> +uint64_t helper_##op(CPUPPCState *env, uint64_t arg1,                   \
> +                     uint64_t arg2, uint64_t arg3)                      \
> +{                                                                       \
> +    if (unlikely((float64_is_infinity(arg1) && float64_is_zero(arg2)) || \
> +                 (float64_is_zero(arg1) && float64_is_infinity(arg2)))) { \
> +        /* Multiplication of zero by infinity */                        \
> +        arg1 = float_invalid_op_excp(env, POWERPC_EXCP_FP_VXIMZ, 1);     \
> +    } else {                                                            \
> +        if (unlikely(float64_is_signaling_nan(arg1, &env->fp_status) || \
> +                     float64_is_signaling_nan(arg2, &env->fp_status) || \
> +                     float64_is_signaling_nan(arg3, &env->fp_status))) { \
> +            /* sNaN operation */                                        \
> +            float_invalid_op_excp(env, POWERPC_EXCP_FP_VXSNAN, 1);      \
> +        }                                                               \
> +                                                                        \
> +        float64_madd_set_vxisi(env, arg1, arg2, arg3, madd_flags);      \
> +        arg1 = float64_muladd(arg1, arg2, arg3, madd_flags,             \
> +                              &env->fp_status);                         \
> +        float_check_status(env);                                        \
> +    }                                                                   \
> +    return arg1;                                                        \
>  }
> +FPU_FMADD(fmadd, MADD_FLGS)
> +FPU_FMADD(fnmadd, NMADD_FLGS)
> +FPU_FMADD(fmsub, MSUB_FLGS)
> +FPU_FMADD(fnmsub, NMSUB_FLGS)
>  
>  /* frsp - frsp. */
>  uint64_t helper_frsp(CPUPPCState *env, uint64_t arg)
> @@ -2384,11 +2269,6 @@ void helper_##op(CPUPPCState *env, uint32_t opcode)                           \
>      float_check_status(env);                                                  \
>  }
>  
> -#define MADD_FLGS 0
> -#define MSUB_FLGS float_muladd_negate_c
> -#define NMADD_FLGS float_muladd_negate_result
> -#define NMSUB_FLGS (float_muladd_negate_c | float_muladd_negate_result)
> -
>  VSX_MADD(xsmaddadp, 1, float64, VsrD(0), MADD_FLGS, 1, 1, 0)
>  VSX_MADD(xsmaddmdp, 1, float64, VsrD(0), MADD_FLGS, 0, 1, 0)
>  VSX_MADD(xsmsubadp, 1, float64, VsrD(0), MSUB_FLGS, 1, 1, 0)
Richard Henderson March 2, 2017, 2:22 a.m. UTC | #2
On 03/02/2017 11:29 AM, David Gibson wrote:
> On Wed, Mar 01, 2017 at 08:54:14PM +0530, Nikunj A Dadhania wrote:
>> Use the softfloat api for fused multiply-add. Also, generate VXISI using
>> a helper function by computing intermediate result.
>
> Um.. I really need some information on why this is a good thing to
> do.  Is it a bugfix?  Enhancement? Simplification?

Looks like a bugfix to me.  Previously we were attempting the operation via 
float128 as an intermediate type, which can result in double rounding errors.


r~
Richard Henderson March 2, 2017, 2:36 a.m. UTC | #3
On 03/02/2017 02:24 AM, Nikunj A Dadhania wrote:
> +static void float64_madd_set_vxisi(CPUPPCState *env, float64 a, float64 b,
> +                                   float64 c, unsigned int flags)
>  {
> +    float64 f = float64_mul(a, b, &env->fp_status);

What is the point of this multiply?
>
> +    /* a*b = ∞ and c = ∞, find ∞ - ∞ case and set VXISI */
> +    if (float64_is_infinity(f) && float64_is_infinity(c)) {
> +        if ((f ^ c) == 0) {
> +            /* Both negative/positive inifinity and substraction*/
> +            if (flags & MSUB_FLGS) {

I would really prefer you use the float_muladd_* names.

> +uint64_t helper_##op(CPUPPCState *env, uint64_t arg1,                   \
> +                     uint64_t arg2, uint64_t arg3)                      \
> +{                                                                       \
> +    if (unlikely((float64_is_infinity(arg1) && float64_is_zero(arg2)) || \
> +                 (float64_is_zero(arg1) && float64_is_infinity(arg2)))) { \
> +        /* Multiplication of zero by infinity */                        \
> +        arg1 = float_invalid_op_excp(env, POWERPC_EXCP_FP_VXIMZ, 1);     \
> +    } else {                                                            \
> +        if (unlikely(float64_is_signaling_nan(arg1, &env->fp_status) || \
> +                     float64_is_signaling_nan(arg2, &env->fp_status) || \
> +                     float64_is_signaling_nan(arg3, &env->fp_status))) { \
> +            /* sNaN operation */                                        \
> +            float_invalid_op_excp(env, POWERPC_EXCP_FP_VXSNAN, 1);      \
> +        }                                                               \
> +                                                                        \
> +        float64_madd_set_vxisi(env, arg1, arg2, arg3, madd_flags);      \
> +        arg1 = float64_muladd(arg1, arg2, arg3, madd_flags,             \
> +                              &env->fp_status);                         \
> +        float_check_status(env);                                        \

I know this is the layout of the bulk of the ppc target, but it's inefficient. 
Let's do this one correctly, akin to target/tricore:

   result = float64_muladd(args...);
   flags = get_float_exception_flags(&env->fp_status);
   if (flags) {
       if (flags & float_flag_invalid) {
           // examine inputs to see why we return NaN
       }
       float_check_status(env);
   }


r~
Nikunj Dadhania March 2, 2017, 4:08 a.m. UTC | #4
On 02-Mar-2017 7:53 AM, "Richard Henderson" <rth@twiddle.net> wrote:

On 03/02/2017 11:29 AM, David Gibson wrote:

> On Wed, Mar 01, 2017 at 08:54:14PM +0530, Nikunj A Dadhania wrote:
>
>> Use the softfloat api for fused multiply-add. Also, generate VXISI using
>> a helper function by computing intermediate result.
>>
>
> Um.. I really need some information on why this is a good thing to
> do.  Is it a bugfix?  Enhancement? Simplification?
>

Looks like a bugfix to me.  Previously we were attempting the operation via
float128 as an intermediate type, which can result in double rounding
errors.

Was discussed here

https://lists.gnu.org/archive/html/qemu-devel/2016-10/msg02000.html

Nikunj
Nikunj Dadhania March 2, 2017, 4:14 a.m. UTC | #5
On 02-Mar-2017 8:07 AM, "Richard Henderson" <rth@twiddle.net> wrote:

On 03/02/2017 02:24 AM, Nikunj A Dadhania wrote:

> +static void float64_madd_set_vxisi(CPUPPCState *env, float64 a, float64
> b,
> +                                   float64 c, unsigned int flags)
>  {
> +    float64 f = float64_mul(a, b, &env->fp_status);
>

What is the point of this multiply?


Only to compute vxisi as stated in the thread
"If the product of x and y is an Infinity and z is an Infinity of the
opposite sign, vxisi_flag is set to 1."

Let me know if I there is an alternative way to achieve this.



> +    /* a*b = ∞ and c = ∞, find ∞ - ∞ case and set VXISI */
> +    if (float64_is_infinity(f) && float64_is_infinity(c)) {
> +        if ((f ^ c) == 0) {
> +            /* Both negative/positive inifinity and substraction*/
> +            if (flags & MSUB_FLGS) {
>

I would really prefer you use the float_muladd_* names.


Sure.



+uint64_t helper_##op(CPUPPCState *env, uint64_t arg1,                   \
> +                     uint64_t arg2, uint64_t arg3)                      \
> +{                                                                       \
> +    if (unlikely((float64_is_infinity(arg1) && float64_is_zero(arg2)) ||
> \
> +                 (float64_is_zero(arg1) && float64_is_infinity(arg2)))) {
> \
> +        /* Multiplication of zero by infinity */                        \
> +        arg1 = float_invalid_op_excp(env, POWERPC_EXCP_FP_VXIMZ, 1);     \
> +    } else {                                                            \
> +        if (unlikely(float64_is_signaling_nan(arg1, &env->fp_status) || \
> +                     float64_is_signaling_nan(arg2, &env->fp_status) || \
> +                     float64_is_signaling_nan(arg3, &env->fp_status))) {
> \
> +            /* sNaN operation */                                        \
> +            float_invalid_op_excp(env, POWERPC_EXCP_FP_VXSNAN, 1);      \
> +        }                                                               \
> +                                                                        \
> +        float64_madd_set_vxisi(env, arg1, arg2, arg3, madd_flags);      \
> +        arg1 = float64_muladd(arg1, arg2, arg3, madd_flags,             \
> +                              &env->fp_status);                         \
> +        float_check_status(env);                                        \
>

I know this is the layout of the bulk of the ppc target, but it's
inefficient. Let's do this one correctly, akin to target/tricore:

  result = float64_muladd(args...);
  flags = get_float_exception_flags(&env->fp_status);
  if (flags) {
      if (flags & float_flag_invalid) {
          // examine inputs to see why we return NaN
      }
      float_check_status(env);
  }


Sure.

Nikunj
Nikunj A Dadhania March 2, 2017, 5:03 a.m. UTC | #6
Oh, some gmail issues, resending:

> On 02-Mar-2017 7:53 AM, "Richard Henderson" <rth@twiddle.net> wrote:
>
> On 03/02/2017 11:29 AM, David Gibson wrote:
>
>> On Wed, Mar 01, 2017 at 08:54:14PM +0530, Nikunj A Dadhania wrote:
>>
>>> Use the softfloat api for fused multiply-add. Also, generate VXISI using
>>> a helper function by computing intermediate result.
>>>
>>
>> Um.. I really need some information on why this is a good thing to
>> do.  Is it a bugfix?  Enhancement? Simplification?
>>
>
> Looks like a bugfix to me.  Previously we were attempting the operation via
> float128 as an intermediate type, which can result in double rounding
> errors.
>

Was discussed here

https://lists.gnu.org/archive/html/qemu-devel/2016-10/msg02000.html

Nikunj
Nikunj A Dadhania March 2, 2017, 5:04 a.m. UTC | #7
On 02-Mar-2017 8:07 AM, "Richard Henderson" <rth@twiddle.net> wrote:
>
> On 03/02/2017 02:24 AM, Nikunj A Dadhania wrote:
>
>> +static void float64_madd_set_vxisi(CPUPPCState *env, float64 a, float64
>> b,
>> +                                   float64 c, unsigned int flags)
>>  {
>> +    float64 f = float64_mul(a, b, &env->fp_status);
>>
>
> What is the point of this multiply?
>
>
Only to compute vxisi as stated in the thread
"If the product of x and y is an Infinity and z is an Infinity of the
opposite sign, vxisi_flag is set to 1."

Let me know if I there is an alternative way to achieve this.

>> +    /* a*b = ∞ and c = ∞, find ∞ - ∞ case and set VXISI */
>> +    if (float64_is_infinity(f) && float64_is_infinity(c)) {
>> +        if ((f ^ c) == 0) {
>> +            /* Both negative/positive inifinity and substraction*/
>> +            if (flags & MSUB_FLGS) {
>>
>
> I would really prefer you use the float_muladd_* names.

Sure.

> +uint64_t helper_##op(CPUPPCState *env, uint64_t arg1,                   \
>> +                     uint64_t arg2, uint64_t arg3)                      \
>> +{                                                                       \
>> +    if (unlikely((float64_is_infinity(arg1) && float64_is_zero(arg2)) ||
>> \
>> +                 (float64_is_zero(arg1) && float64_is_infinity(arg2)))) {
>> \
>> +        /* Multiplication of zero by infinity */                        \
>> +        arg1 = float_invalid_op_excp(env, POWERPC_EXCP_FP_VXIMZ, 1);     \
>> +    } else {                                                            \
>> +        if (unlikely(float64_is_signaling_nan(arg1, &env->fp_status) || \
>> +                     float64_is_signaling_nan(arg2, &env->fp_status) || \
>> +                     float64_is_signaling_nan(arg3, &env->fp_status))) {
>> \
>> +            /* sNaN operation */                                        \
>> +            float_invalid_op_excp(env, POWERPC_EXCP_FP_VXSNAN, 1);      \
>> +        }                                                               \
>> +                                                                        \
>> +        float64_madd_set_vxisi(env, arg1, arg2, arg3, madd_flags);      \
>> +        arg1 = float64_muladd(arg1, arg2, arg3, madd_flags,             \
>> +                              &env->fp_status);                         \
>> +        float_check_status(env);                                        \
>>
>
> I know this is the layout of the bulk of the ppc target, but it's
> inefficient. Let's do this one correctly, akin to target/tricore:
>
>   result = float64_muladd(args...);
>   flags = get_float_exception_flags(&env->fp_status);
>   if (flags) {
>       if (flags & float_flag_invalid) {
>           // examine inputs to see why we return NaN
>       }
>       float_check_status(env);
>   }

Sure.

Nikunj
diff mbox

Patch

diff --git a/target/ppc/fpu_helper.c b/target/ppc/fpu_helper.c
index 58aee64..ed7e84a 100644
--- a/target/ppc/fpu_helper.c
+++ b/target/ppc/fpu_helper.c
@@ -743,178 +743,63 @@  uint64_t helper_frim(CPUPPCState *env, uint64_t arg)
     return do_fri(env, arg, float_round_down);
 }
 
-/* fmadd - fmadd. */
-uint64_t helper_fmadd(CPUPPCState *env, uint64_t arg1, uint64_t arg2,
-                      uint64_t arg3)
-{
-    CPU_DoubleU farg1, farg2, farg3;
-
-    farg1.ll = arg1;
-    farg2.ll = arg2;
-    farg3.ll = arg3;
-
-    if (unlikely((float64_is_infinity(farg1.d) && float64_is_zero(farg2.d)) ||
-                 (float64_is_zero(farg1.d) && float64_is_infinity(farg2.d)))) {
-        /* Multiplication of zero by infinity */
-        farg1.ll = float_invalid_op_excp(env, POWERPC_EXCP_FP_VXIMZ, 1);
-    } else {
-        if (unlikely(float64_is_signaling_nan(farg1.d, &env->fp_status) ||
-                     float64_is_signaling_nan(farg2.d, &env->fp_status) ||
-                     float64_is_signaling_nan(farg3.d, &env->fp_status))) {
-            /* sNaN operation */
-            float_invalid_op_excp(env, POWERPC_EXCP_FP_VXSNAN, 1);
-        }
-        /* This is the way the PowerPC specification defines it */
-        float128 ft0_128, ft1_128;
-
-        ft0_128 = float64_to_float128(farg1.d, &env->fp_status);
-        ft1_128 = float64_to_float128(farg2.d, &env->fp_status);
-        ft0_128 = float128_mul(ft0_128, ft1_128, &env->fp_status);
-        if (unlikely(float128_is_infinity(ft0_128) &&
-                     float64_is_infinity(farg3.d) &&
-                     float128_is_neg(ft0_128) != float64_is_neg(farg3.d))) {
-            /* Magnitude subtraction of infinities */
-            farg1.ll = float_invalid_op_excp(env, POWERPC_EXCP_FP_VXISI, 1);
-        } else {
-            ft1_128 = float64_to_float128(farg3.d, &env->fp_status);
-            ft0_128 = float128_add(ft0_128, ft1_128, &env->fp_status);
-            farg1.d = float128_to_float64(ft0_128, &env->fp_status);
-        }
-    }
-
-    return farg1.ll;
-}
-
-/* fmsub - fmsub. */
-uint64_t helper_fmsub(CPUPPCState *env, uint64_t arg1, uint64_t arg2,
-                      uint64_t arg3)
-{
-    CPU_DoubleU farg1, farg2, farg3;
-
-    farg1.ll = arg1;
-    farg2.ll = arg2;
-    farg3.ll = arg3;
-
-    if (unlikely((float64_is_infinity(farg1.d) && float64_is_zero(farg2.d)) ||
-                 (float64_is_zero(farg1.d) &&
-                  float64_is_infinity(farg2.d)))) {
-        /* Multiplication of zero by infinity */
-        farg1.ll = float_invalid_op_excp(env, POWERPC_EXCP_FP_VXIMZ, 1);
-    } else {
-        if (unlikely(float64_is_signaling_nan(farg1.d, &env->fp_status) ||
-                     float64_is_signaling_nan(farg2.d, &env->fp_status) ||
-                     float64_is_signaling_nan(farg3.d, &env->fp_status))) {
-            /* sNaN operation */
-            float_invalid_op_excp(env, POWERPC_EXCP_FP_VXSNAN, 1);
-        }
-        /* This is the way the PowerPC specification defines it */
-        float128 ft0_128, ft1_128;
-
-        ft0_128 = float64_to_float128(farg1.d, &env->fp_status);
-        ft1_128 = float64_to_float128(farg2.d, &env->fp_status);
-        ft0_128 = float128_mul(ft0_128, ft1_128, &env->fp_status);
-        if (unlikely(float128_is_infinity(ft0_128) &&
-                     float64_is_infinity(farg3.d) &&
-                     float128_is_neg(ft0_128) == float64_is_neg(farg3.d))) {
-            /* Magnitude subtraction of infinities */
-            farg1.ll = float_invalid_op_excp(env, POWERPC_EXCP_FP_VXISI, 1);
-        } else {
-            ft1_128 = float64_to_float128(farg3.d, &env->fp_status);
-            ft0_128 = float128_sub(ft0_128, ft1_128, &env->fp_status);
-            farg1.d = float128_to_float64(ft0_128, &env->fp_status);
-        }
-    }
-    return farg1.ll;
-}
+#define MADD_FLGS 0
+#define MSUB_FLGS float_muladd_negate_c
+#define NMADD_FLGS float_muladd_negate_result
+#define NMSUB_FLGS (float_muladd_negate_c | float_muladd_negate_result)
 
-/* fnmadd - fnmadd. */
-uint64_t helper_fnmadd(CPUPPCState *env, uint64_t arg1, uint64_t arg2,
-                       uint64_t arg3)
+static void float64_madd_set_vxisi(CPUPPCState *env, float64 a, float64 b,
+                                   float64 c, unsigned int flags)
 {
-    CPU_DoubleU farg1, farg2, farg3;
-
-    farg1.ll = arg1;
-    farg2.ll = arg2;
-    farg3.ll = arg3;
+    float64 f = float64_mul(a, b, &env->fp_status);
 
-    if (unlikely((float64_is_infinity(farg1.d) && float64_is_zero(farg2.d)) ||
-                 (float64_is_zero(farg1.d) && float64_is_infinity(farg2.d)))) {
-        /* Multiplication of zero by infinity */
-        farg1.ll = float_invalid_op_excp(env, POWERPC_EXCP_FP_VXIMZ, 1);
-    } else {
-        if (unlikely(float64_is_signaling_nan(farg1.d, &env->fp_status) ||
-                     float64_is_signaling_nan(farg2.d, &env->fp_status) ||
-                     float64_is_signaling_nan(farg3.d, &env->fp_status))) {
-            /* sNaN operation */
-            float_invalid_op_excp(env, POWERPC_EXCP_FP_VXSNAN, 1);
-        }
-        /* This is the way the PowerPC specification defines it */
-        float128 ft0_128, ft1_128;
-
-        ft0_128 = float64_to_float128(farg1.d, &env->fp_status);
-        ft1_128 = float64_to_float128(farg2.d, &env->fp_status);
-        ft0_128 = float128_mul(ft0_128, ft1_128, &env->fp_status);
-        if (unlikely(float128_is_infinity(ft0_128) &&
-                     float64_is_infinity(farg3.d) &&
-                     float128_is_neg(ft0_128) != float64_is_neg(farg3.d))) {
-            /* Magnitude subtraction of infinities */
-            farg1.ll = float_invalid_op_excp(env, POWERPC_EXCP_FP_VXISI, 1);
-        } else {
-            ft1_128 = float64_to_float128(farg3.d, &env->fp_status);
-            ft0_128 = float128_add(ft0_128, ft1_128, &env->fp_status);
-            farg1.d = float128_to_float64(ft0_128, &env->fp_status);
-        }
-        if (likely(!float64_is_any_nan(farg1.d))) {
-            farg1.d = float64_chs(farg1.d);
+    /* a*b = ∞ and c = ∞, find ∞ - ∞ case and set VXISI */
+    if (float64_is_infinity(f) && float64_is_infinity(c)) {
+        if ((f ^ c) == 0) {
+            /* Both negative/positive inifinity and substraction*/
+            if (flags & MSUB_FLGS) {
+                /* 1. ∞ - ∞
+                 * 2. (-∞) - (-∞)
+                 */
+                float_invalid_op_excp(env, POWERPC_EXCP_FP_VXISI, 1);
+            }
+        } else if (!(flags & MSUB_FLGS)) {
+            /* Opposite sign and addition
+             * 1) ∞ + (-∞)
+             * 2) (-∞) + ∞
+             */
+            float_invalid_op_excp(env, POWERPC_EXCP_FP_VXISI, 1);
         }
     }
-    return farg1.ll;
 }
 
-/* fnmsub - fnmsub. */
-uint64_t helper_fnmsub(CPUPPCState *env, uint64_t arg1, uint64_t arg2,
-                       uint64_t arg3)
-{
-    CPU_DoubleU farg1, farg2, farg3;
-
-    farg1.ll = arg1;
-    farg2.ll = arg2;
-    farg3.ll = arg3;
-
-    if (unlikely((float64_is_infinity(farg1.d) && float64_is_zero(farg2.d)) ||
-                 (float64_is_zero(farg1.d) &&
-                  float64_is_infinity(farg2.d)))) {
-        /* Multiplication of zero by infinity */
-        farg1.ll = float_invalid_op_excp(env, POWERPC_EXCP_FP_VXIMZ, 1);
-    } else {
-        if (unlikely(float64_is_signaling_nan(farg1.d, &env->fp_status) ||
-                     float64_is_signaling_nan(farg2.d, &env->fp_status) ||
-                     float64_is_signaling_nan(farg3.d, &env->fp_status))) {
-            /* sNaN operation */
-            float_invalid_op_excp(env, POWERPC_EXCP_FP_VXSNAN, 1);
-        }
-        /* This is the way the PowerPC specification defines it */
-        float128 ft0_128, ft1_128;
-
-        ft0_128 = float64_to_float128(farg1.d, &env->fp_status);
-        ft1_128 = float64_to_float128(farg2.d, &env->fp_status);
-        ft0_128 = float128_mul(ft0_128, ft1_128, &env->fp_status);
-        if (unlikely(float128_is_infinity(ft0_128) &&
-                     float64_is_infinity(farg3.d) &&
-                     float128_is_neg(ft0_128) == float64_is_neg(farg3.d))) {
-            /* Magnitude subtraction of infinities */
-            farg1.ll = float_invalid_op_excp(env, POWERPC_EXCP_FP_VXISI, 1);
-        } else {
-            ft1_128 = float64_to_float128(farg3.d, &env->fp_status);
-            ft0_128 = float128_sub(ft0_128, ft1_128, &env->fp_status);
-            farg1.d = float128_to_float64(ft0_128, &env->fp_status);
-        }
-        if (likely(!float64_is_any_nan(farg1.d))) {
-            farg1.d = float64_chs(farg1.d);
-        }
-    }
-    return farg1.ll;
+#define FPU_FMADD(op, madd_flags)                                       \
+uint64_t helper_##op(CPUPPCState *env, uint64_t arg1,                   \
+                     uint64_t arg2, uint64_t arg3)                      \
+{                                                                       \
+    if (unlikely((float64_is_infinity(arg1) && float64_is_zero(arg2)) || \
+                 (float64_is_zero(arg1) && float64_is_infinity(arg2)))) { \
+        /* Multiplication of zero by infinity */                        \
+        arg1 = float_invalid_op_excp(env, POWERPC_EXCP_FP_VXIMZ, 1);     \
+    } else {                                                            \
+        if (unlikely(float64_is_signaling_nan(arg1, &env->fp_status) || \
+                     float64_is_signaling_nan(arg2, &env->fp_status) || \
+                     float64_is_signaling_nan(arg3, &env->fp_status))) { \
+            /* sNaN operation */                                        \
+            float_invalid_op_excp(env, POWERPC_EXCP_FP_VXSNAN, 1);      \
+        }                                                               \
+                                                                        \
+        float64_madd_set_vxisi(env, arg1, arg2, arg3, madd_flags);      \
+        arg1 = float64_muladd(arg1, arg2, arg3, madd_flags,             \
+                              &env->fp_status);                         \
+        float_check_status(env);                                        \
+    }                                                                   \
+    return arg1;                                                        \
 }
+FPU_FMADD(fmadd, MADD_FLGS)
+FPU_FMADD(fnmadd, NMADD_FLGS)
+FPU_FMADD(fmsub, MSUB_FLGS)
+FPU_FMADD(fnmsub, NMSUB_FLGS)
 
 /* frsp - frsp. */
 uint64_t helper_frsp(CPUPPCState *env, uint64_t arg)
@@ -2384,11 +2269,6 @@  void helper_##op(CPUPPCState *env, uint32_t opcode)                           \
     float_check_status(env);                                                  \
 }
 
-#define MADD_FLGS 0
-#define MSUB_FLGS float_muladd_negate_c
-#define NMADD_FLGS float_muladd_negate_result
-#define NMSUB_FLGS (float_muladd_negate_c | float_muladd_negate_result)
-
 VSX_MADD(xsmaddadp, 1, float64, VsrD(0), MADD_FLGS, 1, 1, 0)
 VSX_MADD(xsmaddmdp, 1, float64, VsrD(0), MADD_FLGS, 0, 1, 0)
 VSX_MADD(xsmsubadp, 1, float64, VsrD(0), MSUB_FLGS, 1, 1, 0)