diff mbox

[V2] Fix float64_to_uint64

Message ID 525F00C1.5050002@gmail.com
State New
Headers show

Commit Message

Tom Musta Oct. 16, 2013, 9:10 p.m. UTC
The comment preceding the float64_to_uint64 routine suggests that
the implementation is broken.  And this is, indeed, the case.

This patch properly implements the conversion of a 64-bit floating
point number to an unsigned, 64 bit integer.

Note that the patch does not pass scripts/checkpatch.pl because it
maintains the coding style of fpu/softfloat.c.

V2: This contribution can be licensed under either the softfloat-2a or -2b
license.

---
  fpu/softfloat.c |   92 ++++++++++++++++++++++++++++++++++++++++++++++++++----
  1 files changed, 85 insertions(+), 7 deletions(-)


@@ -6536,18 +6576,56 @@ uint_fast16_t float64_to_uint16_round_to_zero(float64 a STATUS_PARAM)
      return res;
  }

-/* FIXME: This looks broken.  */
-uint64_t float64_to_uint64 (float64 a STATUS_PARAM)
+/*----------------------------------------------------------------------------
+| Returns the result of converting the double-precision floating-point value
+| `a' to the 64-bit unsigned integer format.  The conversion is
+| performed according to the IEC/IEEE Standard for Binary Floating-Point
+| Arithmetic---which means in particular that the conversion is rounded
+| according to the current rounding mode.  If `a' is a NaN, the largest
+| positive integer is returned.  If the conversion overflows, the
+| largest unsigned integer is returned.  If 'a' is negative, zero is
+| returned.
+*----------------------------------------------------------------------------*/
+
+uint64_t float64_to_uint64( float64 a STATUS_PARAM )
  {
-    int64_t v;
+    flag aSign;
+    int_fast16_t aExp, shiftCount;
+    uint64_t aSig, aSigExtra;
+    a = float64_squash_input_denormal(a STATUS_VAR);

-    v = float64_val(int64_to_float64(INT64_MIN STATUS_VAR));
-    v += float64_val(a);
-    v = float64_to_int64(make_float64(v) STATUS_VAR);
+    aSig = extractFloat64Frac( a );
+    aExp = extractFloat64Exp( a );
+    aSign = extractFloat64Sign( a );
+    if ( aSign ) {
+        if ( aExp ) {
+            float_raise( float_flag_invalid STATUS_VAR);
+        } else if ( aSig ) { /* negative denormalized */
+            float_raise( float_flag_inexact STATUS_VAR);
+        }
+        return 0;
+    }
+    if ( aExp ) aSig |= LIT64( 0x0010000000000000 );
+    shiftCount = 0x433 - aExp;
+    if ( shiftCount <= 0 ) {
+        if ( 0x43E < aExp ) {
+            if ( ( aSig != LIT64( 0x0010000000000000 ) ) ||
+                 ( aExp == 0x7FF ) ) {
+                float_raise( float_flag_invalid STATUS_VAR);
+            }
+            return LIT64( 0xFFFFFFFFFFFFFFFF );
+        }
+        aSigExtra = 0;
+        aSig <<= - shiftCount;
+    }
+    else {
+        shift64ExtraRightJamming( aSig, 0, shiftCount, &aSig, &aSigExtra );
+    }
+    return roundAndPackUint64( aSig, aSigExtra STATUS_VAR );

-    return v - INT64_MIN;
  }

+
  uint64_t float64_to_uint64_round_to_zero (float64 a STATUS_PARAM)
  {
      int64_t v;

Comments

Alexander Graf Oct. 17, 2013, 9:40 a.m. UTC | #1
On 16.10.2013, at 23:10, Tom Musta <tommusta@gmail.com> wrote:

> The comment preceding the float64_to_uint64 routine suggests that
> the implementation is broken.  And this is, indeed, the case.
> 
> This patch properly implements the conversion of a 64-bit floating
> point number to an unsigned, 64 bit integer.
> 
> Note that the patch does not pass scripts/checkpatch.pl because it
> maintains the coding style of fpu/softfloat.c.
> 
> V2: This contribution can be licensed under either the softfloat-2a or -2b
> license.

Missing a SoB line.


Alex

> 
> ---
> fpu/softfloat.c |   92 ++++++++++++++++++++++++++++++++++++++++++++++++++----
> 1 files changed, 85 insertions(+), 7 deletions(-)
> 
> diff --git a/fpu/softfloat.c b/fpu/softfloat.c
> index 7ba51b6..f8c7f92 100644
> --- a/fpu/softfloat.c
> +++ b/fpu/softfloat.c
> @@ -204,6 +204,46 @@ static int64 roundAndPackInt64( flag zSign, uint64_t absZ0, uint64_t absZ1 STATU
> }
> 
> /*----------------------------------------------------------------------------
> +| Takes the 128-bit fixed-point value formed by concatenating `absZ0' and
> +| `absZ1', with binary point between bits 63 and 64 (between the input words),
> +| and returns the properly rounded 64-bit unsigned integer corresponding to the
> +| input.  Ordinarily, the fixed-point input is simply rounded to an integer,
> +| with the inexact exception raised if the input cannot be represented exactly
> +| as an integer.  However, if the fixed-point input is too large, the invalid
> +| exception is raised and the largest unsigned integer is returned.
> +*----------------------------------------------------------------------------*/
> +
> +static int64 roundAndPackUint64( uint64_t absZ0, uint64_t absZ1 STATUS_PARAM)
> +{
> +    int8 roundingMode;
> +    flag roundNearestEven, increment;
> +    int64_t z;
> +
> +    roundingMode = STATUS(float_rounding_mode);
> +    roundNearestEven = ( roundingMode == float_round_nearest_even );
> +    increment = ( (int64_t) absZ1 < 0 );
> +    if ( ! roundNearestEven ) {
> +        if ( roundingMode == float_round_to_zero ) {
> +            increment = 0;
> +        }
> +        else {
> +            increment = ( roundingMode == float_round_up ) && absZ1;
> +        }
> +    }
> +    if ( increment ) {
> +        ++absZ0;
> +        if ( absZ0 == 0 ) {
> +            float_raise( float_flag_invalid STATUS_VAR);
> +            return LIT64( 0xFFFFFFFFFFFFFFFF );
> +        }
> +        absZ0 &= ~ ( ( (uint64_t) ( absZ1<<1 ) == 0 ) & roundNearestEven );
> +    }
> +    z = absZ0;
> +    if ( absZ1 ) STATUS(float_exception_flags) |= float_flag_inexact;
> +    return z;
> +}
> +
> +/*----------------------------------------------------------------------------
> | Returns the fraction bits of the single-precision floating-point value `a'.
> *----------------------------------------------------------------------------*/
> 
> @@ -6536,18 +6576,56 @@ uint_fast16_t float64_to_uint16_round_to_zero(float64 a STATUS_PARAM)
>     return res;
> }
> 
> -/* FIXME: This looks broken.  */
> -uint64_t float64_to_uint64 (float64 a STATUS_PARAM)
> +/*----------------------------------------------------------------------------
> +| Returns the result of converting the double-precision floating-point value
> +| `a' to the 64-bit unsigned integer format.  The conversion is
> +| performed according to the IEC/IEEE Standard for Binary Floating-Point
> +| Arithmetic---which means in particular that the conversion is rounded
> +| according to the current rounding mode.  If `a' is a NaN, the largest
> +| positive integer is returned.  If the conversion overflows, the
> +| largest unsigned integer is returned.  If 'a' is negative, zero is
> +| returned.
> +*----------------------------------------------------------------------------*/
> +
> +uint64_t float64_to_uint64( float64 a STATUS_PARAM )
> {
> -    int64_t v;
> +    flag aSign;
> +    int_fast16_t aExp, shiftCount;
> +    uint64_t aSig, aSigExtra;
> +    a = float64_squash_input_denormal(a STATUS_VAR);
> 
> -    v = float64_val(int64_to_float64(INT64_MIN STATUS_VAR));
> -    v += float64_val(a);
> -    v = float64_to_int64(make_float64(v) STATUS_VAR);
> +    aSig = extractFloat64Frac( a );
> +    aExp = extractFloat64Exp( a );
> +    aSign = extractFloat64Sign( a );
> +    if ( aSign ) {
> +        if ( aExp ) {
> +            float_raise( float_flag_invalid STATUS_VAR);
> +        } else if ( aSig ) { /* negative denormalized */
> +            float_raise( float_flag_inexact STATUS_VAR);
> +        }
> +        return 0;
> +    }
> +    if ( aExp ) aSig |= LIT64( 0x0010000000000000 );
> +    shiftCount = 0x433 - aExp;
> +    if ( shiftCount <= 0 ) {
> +        if ( 0x43E < aExp ) {
> +            if ( ( aSig != LIT64( 0x0010000000000000 ) ) ||
> +                 ( aExp == 0x7FF ) ) {
> +                float_raise( float_flag_invalid STATUS_VAR);
> +            }
> +            return LIT64( 0xFFFFFFFFFFFFFFFF );
> +        }
> +        aSigExtra = 0;
> +        aSig <<= - shiftCount;
> +    }
> +    else {
> +        shift64ExtraRightJamming( aSig, 0, shiftCount, &aSig, &aSigExtra );
> +    }
> +    return roundAndPackUint64( aSig, aSigExtra STATUS_VAR );
> 
> -    return v - INT64_MIN;
> }
> 
> +
> uint64_t float64_to_uint64_round_to_zero (float64 a STATUS_PARAM)
> {
>     int64_t v;
> -- 
> 1.7.1
> 
> 
>
Stefan Weil Oct. 17, 2013, 4:31 p.m. UTC | #2
Am 17.10.2013 11:40, schrieb Alexander Graf:
> On 16.10.2013, at 23:10, Tom Musta <tommusta@gmail.com> wrote:
>
>> The comment preceding the float64_to_uint64 routine suggests that
>> the implementation is broken.  And this is, indeed, the case.
>>
>> This patch properly implements the conversion of a 64-bit floating
>> point number to an unsigned, 64 bit integer.
>>
>> Note that the patch does not pass scripts/checkpatch.pl because it
>> maintains the coding style of fpu/softfloat.c.
>>
>> V2: This contribution can be licensed under either the softfloat-2a or -2b
>> license.
> Missing a SoB line.
>
>
> Alex

There is already a mix of coding styles in fpu/softfloat.c, and your
patch adds large regions of new code.
Therefore I expect that such contributions should respect the QEMU
coding style.

The situation is different if only single lines in some function are
replaced or added.

Stefan
Tom Musta Oct. 17, 2013, 5:58 p.m. UTC | #3
> On 10/17/2013 11:31 AM, Stefan Weil wrote:
>> Am 17.10.2013 11:40, schrieb Alexander Graf:
>> Missing a SoB line.
>>
>>
>> Alex
>
> There is already a mix of coding styles in fpu/softfloat.c, and your
> patch adds large regions of new code.
> Therefore I expect that such contributions should respect the QEMU
> coding style.
>
> The situation is different if only single lines in some function are
> replaced or added.
>
> Stefan
>

OK .... I will rework the patch to use QEMU style.  And will add the sob.
diff mbox

Patch

diff --git a/fpu/softfloat.c b/fpu/softfloat.c
index 7ba51b6..f8c7f92 100644
--- a/fpu/softfloat.c
+++ b/fpu/softfloat.c
@@ -204,6 +204,46 @@  static int64 roundAndPackInt64( flag zSign, uint64_t absZ0, uint64_t absZ1 
STATU
  }

  /*----------------------------------------------------------------------------
+| Takes the 128-bit fixed-point value formed by concatenating `absZ0' and
+| `absZ1', with binary point between bits 63 and 64 (between the input words),
+| and returns the properly rounded 64-bit unsigned integer corresponding to the
+| input.  Ordinarily, the fixed-point input is simply rounded to an integer,
+| with the inexact exception raised if the input cannot be represented exactly
+| as an integer.  However, if the fixed-point input is too large, the invalid
+| exception is raised and the largest unsigned integer is returned.
+*----------------------------------------------------------------------------*/
+
+static int64 roundAndPackUint64( uint64_t absZ0, uint64_t absZ1 STATUS_PARAM)
+{
+    int8 roundingMode;
+    flag roundNearestEven, increment;
+    int64_t z;
+
+    roundingMode = STATUS(float_rounding_mode);
+    roundNearestEven = ( roundingMode == float_round_nearest_even );
+    increment = ( (int64_t) absZ1 < 0 );
+    if ( ! roundNearestEven ) {
+        if ( roundingMode == float_round_to_zero ) {
+            increment = 0;
+        }
+        else {
+            increment = ( roundingMode == float_round_up ) && absZ1;
+        }
+    }
+    if ( increment ) {
+        ++absZ0;
+        if ( absZ0 == 0 ) {
+            float_raise( float_flag_invalid STATUS_VAR);
+            return LIT64( 0xFFFFFFFFFFFFFFFF );
+        }
+        absZ0 &= ~ ( ( (uint64_t) ( absZ1<<1 ) == 0 ) & roundNearestEven );
+    }
+    z = absZ0;
+    if ( absZ1 ) STATUS(float_exception_flags) |= float_flag_inexact;
+    return z;
+}
+
+/*----------------------------------------------------------------------------
  | Returns the fraction bits of the single-precision floating-point value `a'.
  *----------------------------------------------------------------------------*/