diff mbox

[v2,3/6] softfloat: Fix single-to-half precision float conversions

Message ID 1297268850-5777-4-git-send-email-peter.maydell@linaro.org
State New
Headers show

Commit Message

Peter Maydell Feb. 9, 2011, 4:27 p.m. UTC
Fix various bugs in the single-to-half-precision conversion code:
 * input NaNs not correctly converted in IEEE mode
   (fixed by defining and using a commonNaNToFloat16())
 * wrong values returned when converting NaN/Inf into non-IEEE
   half precision value
 * wrong values returned for conversion of values which are
   on the boundary between denormal and zero for the half
   precision format
 * zeroes not correctly identified
 * excessively large results in non-IEEE mode should
   generate InvalidOp, not Overflow

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
---
 fpu/softfloat-specialize.h |   20 ++++++++++++++++++++
 fpu/softfloat.c            |   30 ++++++++++++++++++------------
 2 files changed, 38 insertions(+), 12 deletions(-)

Comments

Aurelien Jarno Feb. 9, 2011, 6:41 p.m. UTC | #1
On Wed, Feb 09, 2011 at 04:27:27PM +0000, Peter Maydell wrote:
> Fix various bugs in the single-to-half-precision conversion code:
>  * input NaNs not correctly converted in IEEE mode
>    (fixed by defining and using a commonNaNToFloat16())
>  * wrong values returned when converting NaN/Inf into non-IEEE
>    half precision value
>  * wrong values returned for conversion of values which are
>    on the boundary between denormal and zero for the half
>    precision format
>  * zeroes not correctly identified
>  * excessively large results in non-IEEE mode should
>    generate InvalidOp, not Overflow
> 
> Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
> ---
>  fpu/softfloat-specialize.h |   20 ++++++++++++++++++++
>  fpu/softfloat.c            |   30 ++++++++++++++++++------------
>  2 files changed, 38 insertions(+), 12 deletions(-)
> 
> diff --git a/fpu/softfloat-specialize.h b/fpu/softfloat-specialize.h
> index 2f65b9d..4907484 100644
> --- a/fpu/softfloat-specialize.h
> +++ b/fpu/softfloat-specialize.h
> @@ -116,6 +116,26 @@ float16 float16_maybe_silence_nan(float16 a)
>  }
>  
>  /*----------------------------------------------------------------------------
> +| Returns the result of converting the canonical NaN `a' to the half-
> +| precision floating-point format.
> +*----------------------------------------------------------------------------*/
> +
> +static float16 commonNaNToFloat16(commonNaNT a STATUS_PARAM)
> +{
> +    uint16_t mantissa = a.high>>54;
> +
> +    if (STATUS(default_nan_mode)) {
> +        return float16_default_nan;
> +    }
> +
> +    if (mantissa) {
> +        return ((((uint16_t) a.sign) << 15) | (0x1F << 10) | mantissa);
> +    } else {
> +        return float16_default_nan;
> +    }
> +}
> +
> +/*----------------------------------------------------------------------------
>  | The pattern for a default generated single-precision NaN.
>  *----------------------------------------------------------------------------*/
>  #if defined(TARGET_SPARC)
> diff --git a/fpu/softfloat.c b/fpu/softfloat.c
> index c3058f4..4d51428 100644
> --- a/fpu/softfloat.c
> +++ b/fpu/softfloat.c
> @@ -2767,25 +2767,31 @@ float16 float32_to_float16(float32 a, flag ieee STATUS_PARAM)
>      aExp = extractFloat32Exp( a );
>      aSign = extractFloat32Sign( a );
>      if ( aExp == 0xFF ) {
> -        if (aSig) {
> -            /* Make sure correct exceptions are raised.  */
> -            float32ToCommonNaN(a STATUS_VAR);
> -            aSig |= 0x00400000;
> +        if ( aSig ) {
> +            /* Input is a NaN */
> +            bits16 r = commonNaNToFloat16( float32ToCommonNaN( a STATUS_VAR ) STATUS_VAR );

Here you should now use a float16 instead of bits16.

> +            if (!ieee) {
> +                return packFloat16(aSign, 0, 0);
> +            }
> +            return r;
>          }
> -        return packFloat16(aSign, 0x1f, aSig >> 13);
> +        /* Infinity */
> +        if (!ieee) {
> +            float_raise(float_flag_invalid STATUS_VAR);
> +            return packFloat16(aSign, 0x1f, 0x3ff);
> +        }
> +        return packFloat16(aSign, 0x1f, 0);
>      }
> -    if (aExp == 0 && aSign == 0) {
> +    if (aExp == 0 && aSig == 0) {
>          return packFloat16(aSign, 0, 0);
>      }
>      /* Decimal point between bits 22 and 23.  */
>      aSig |= 0x00800000;
>      aExp -= 0x7f;
>      if (aExp < -14) {
> -        mask = 0x007fffff;
> -        if (aExp < -24) {
> -            aExp = -25;
> -        } else {
> -            mask >>= 24 + aExp;
> +        mask = 0x00ffffff;
> +        if (aExp >= -24) {
> +            mask >>= 25 + aExp;
>          }
>      } else {
>          mask = 0x00001fff;
> @@ -2827,7 +2833,7 @@ float16 float32_to_float16(float32 a, flag ieee STATUS_PARAM)
>          }
>      } else {
>          if (aExp > 16) {
> -            float_raise( float_flag_overflow | float_flag_inexact STATUS_VAR);
> +            float_raise(float_flag_invalid | float_flag_inexact STATUS_VAR);
>              return packFloat16(aSign, 0x1f, 0x3ff);
>          }
>      }

Otherwise looks fine, nice fixes.
diff mbox

Patch

diff --git a/fpu/softfloat-specialize.h b/fpu/softfloat-specialize.h
index 2f65b9d..4907484 100644
--- a/fpu/softfloat-specialize.h
+++ b/fpu/softfloat-specialize.h
@@ -116,6 +116,26 @@  float16 float16_maybe_silence_nan(float16 a)
 }
 
 /*----------------------------------------------------------------------------
+| Returns the result of converting the canonical NaN `a' to the half-
+| precision floating-point format.
+*----------------------------------------------------------------------------*/
+
+static float16 commonNaNToFloat16(commonNaNT a STATUS_PARAM)
+{
+    uint16_t mantissa = a.high>>54;
+
+    if (STATUS(default_nan_mode)) {
+        return float16_default_nan;
+    }
+
+    if (mantissa) {
+        return ((((uint16_t) a.sign) << 15) | (0x1F << 10) | mantissa);
+    } else {
+        return float16_default_nan;
+    }
+}
+
+/*----------------------------------------------------------------------------
 | The pattern for a default generated single-precision NaN.
 *----------------------------------------------------------------------------*/
 #if defined(TARGET_SPARC)
diff --git a/fpu/softfloat.c b/fpu/softfloat.c
index c3058f4..4d51428 100644
--- a/fpu/softfloat.c
+++ b/fpu/softfloat.c
@@ -2767,25 +2767,31 @@  float16 float32_to_float16(float32 a, flag ieee STATUS_PARAM)
     aExp = extractFloat32Exp( a );
     aSign = extractFloat32Sign( a );
     if ( aExp == 0xFF ) {
-        if (aSig) {
-            /* Make sure correct exceptions are raised.  */
-            float32ToCommonNaN(a STATUS_VAR);
-            aSig |= 0x00400000;
+        if ( aSig ) {
+            /* Input is a NaN */
+            bits16 r = commonNaNToFloat16( float32ToCommonNaN( a STATUS_VAR ) STATUS_VAR );
+            if (!ieee) {
+                return packFloat16(aSign, 0, 0);
+            }
+            return r;
         }
-        return packFloat16(aSign, 0x1f, aSig >> 13);
+        /* Infinity */
+        if (!ieee) {
+            float_raise(float_flag_invalid STATUS_VAR);
+            return packFloat16(aSign, 0x1f, 0x3ff);
+        }
+        return packFloat16(aSign, 0x1f, 0);
     }
-    if (aExp == 0 && aSign == 0) {
+    if (aExp == 0 && aSig == 0) {
         return packFloat16(aSign, 0, 0);
     }
     /* Decimal point between bits 22 and 23.  */
     aSig |= 0x00800000;
     aExp -= 0x7f;
     if (aExp < -14) {
-        mask = 0x007fffff;
-        if (aExp < -24) {
-            aExp = -25;
-        } else {
-            mask >>= 24 + aExp;
+        mask = 0x00ffffff;
+        if (aExp >= -24) {
+            mask >>= 25 + aExp;
         }
     } else {
         mask = 0x00001fff;
@@ -2827,7 +2833,7 @@  float16 float32_to_float16(float32 a, flag ieee STATUS_PARAM)
         }
     } else {
         if (aExp > 16) {
-            float_raise( float_flag_overflow | float_flag_inexact STATUS_VAR);
+            float_raise(float_flag_invalid | float_flag_inexact STATUS_VAR);
             return packFloat16(aSign, 0x1f, 0x3ff);
         }
     }