diff mbox

[v3,2/4] target-tilegx: Add single floating point implementation

Message ID 566988EA.109@emindsoft.com.cn
State New
Headers show

Commit Message

Chen Gang Dec. 10, 2015, 2:15 p.m. UTC
It passes gcc testsuite.

Signed-off-by: Chen Gang <gang.chen.5i5j@gmail.com>
---
 target-tilegx/helper-fsingle.c | 212 +++++++++++++++++++++++++++++++++++++++++
 1 file changed, 212 insertions(+)
 create mode 100644 target-tilegx/helper-fsingle.c

Comments

Richard Henderson Dec. 10, 2015, 8:18 p.m. UTC | #1
On 12/10/2015 09:15 AM, Richard Henderson wrote:
>   d = (uint64_t)sign << 63;
>   d = deposit64(d, 53, 11, exp);
>   d = deposit64(d, 21, 32, man);
>   return float64_to_float32(d, fp_status);

Hmm.  Actually, this incorrectly adds the implicit bit.  We'd actually need to
steal portions of softfloat.c to do this properly.  Which still isn't that
difficult.


r~
Chen Gang Dec. 10, 2015, 10:14 p.m. UTC | #2
On 12/11/15 01:15, Richard Henderson wrote:
> On 12/10/2015 06:15 AM, Chen Gang wrote:
>> +#define TILEGX_F_CALC_CVT   0     /* convert int to fsingle */
>> +#define TILEGX_F_CALC_NCVT  1     /* Not convertion */
>> +
>> +static uint32_t get_f32_exp(float32 f)
>> +{
>> +    return extract32(float32_val(f), 23, 8);
>> +}
>> +
>> +static void set_f32_exp(float32 *f, uint32_t exp)
>> +{
>> +    *f = make_float32(deposit32(float32_val(*f), 23, 8, exp));
>> +}
> 
> Why take a pointer instead of returning the new value?
>

I referenced set_* functions' declarations in "include/fpu/softfloat.h",
originally.

 
>> +static inline uint32_t get_fsingle_sign(uint64_t n)
>> +{
>> +    return test_bit(10, &n);
>> +}
>> +
>> +static inline void set_fsingle_sign(uint64_t *n)
>> +{
>> +    set_bit(10, n);
>> +}
> 
> Why are you using test_bit and set_bit here, rather than continuing to use
> deposit and extract?
> 

It is really only for one bit test and set, so test_bit/set_bit are
simpler and clearer than deposit/extract.


>> +static float32 sfmt_to_float32(uint64_t sfmt, float_status *fp_status)
>> +{
>> +    float32 f;
>> +    uint32_t sign = get_fsingle_sign(sfmt);
>> +    uint32_t man = get_fsingle_man(sfmt);
>> +
>> +    if (get_fsingle_calc(sfmt) == TILEGX_F_CALC_CVT) {
>> +        if (sign) {
>> +            return int32_to_float32(0 - man, fp_status);
>> +        } else {
>> +            return uint32_to_float32(man, fp_status);
>> +        }
>> +    } else {
>> +        f = float32_set_sign(float32_zero, sign);
>> +        f |= create_f32_man(man >> 8);
>> +        set_f32_exp(&f, get_fsingle_exp(sfmt));
>> +    }
> 
> I'm not especially keen on this calc bit.  I'd much rather that we always pack
> and round properly.
>

OK.
 
> In particular, if gcc decided to optimize fractional fixed-point types, it
> would do something very similar to the current floatsisf2 code sequence, except
> that it wouldn't use 0x9e as the exponent; it would use something smaller, so
> that some number of low bits of the mantessa would be below the radix point.
> 

Oh, really.

> Therefore, I think that fsingle_pack2 should do the following: Take the
> (sign,exp,man) tuple and slot them into a double -- recall that a single only
> has 23 bits in its mantessa, and this temp format has 32 -- then convert the
> double to a single.  Pre-rounded single results from fsingle_* will be
> unchanged, while integer data that gcc has constructed will be properly rounded.
> 
> E.g.
> 
>   uint32_t sign = get_fsingle_sign(sfmt);
>   uint32_t exp = get_fsingle_exp(sfmt);
>   uint32_t man = get_fsingle_man(sfmt);
>   uint64_t d;
> 
>   /* Adjust the exponent for double precision, preserving Inf/NaN.  */
>   if (exp == 0xff) {
>     exp = 0x7ff;
>   } else {
>     exp += 1023 - 127;
>   }
> 
>   d = (uint64_t)sign << 63;
>   d = deposit64(d, 53, 11, exp);
>   d = deposit64(d, 21, 32, man);
>   return float64_to_float32(d, fp_status);
> 
> Note that this does require float32_to_sfmt to store the mantissa
> left-justified. That is, not in bits [54-32] as you're doing now, but in bits
> [63-41].
> 

For me, it is a good idea! :-)


>> +static void ana_bits(float_status *fp_status,
>> +                     float32 fsrca, float32 fsrcb, uint64_t *sfmt)
> 
> Is "ana" supposed to be short for "analyze"?
>

Yes.
 
>> +{
>> +    if (float32_eq(fsrca, fsrcb, fp_status)) {
>> +        *sfmt |= create_fsfd_flag_eq();
>> +    } else {
>> +        *sfmt |= create_fsfd_flag_ne();
>> +    }
>> +
>> +    if (float32_lt(fsrca, fsrcb, fp_status)) {
>> +        *sfmt |= create_fsfd_flag_lt();
>> +    }
>> +    if (float32_le(fsrca, fsrcb, fp_status)) {
>> +        *sfmt |= create_fsfd_flag_le();
>> +    }
>> +
>> +    if (float32_lt(fsrcb, fsrca, fp_status)) {
>> +        *sfmt |= create_fsfd_flag_gt();
>> +    }
>> +    if (float32_le(fsrcb, fsrca, fp_status)) {
>> +        *sfmt |= create_fsfd_flag_ge();
>> +    }
>> +
>> +    if (float32_unordered(fsrca, fsrcb, fp_status)) {
>> +        *sfmt |= create_fsfd_flag_un();
>> +    }
>> +}
> 
> Again, I think it's better to return the new sfmt value than modify a pointer.
> 

Oh, I guess, we can inline ana_bits() to main_calc(), for they are both
simple short functions, and ana_bits() is only called by main_calc().

Thanks.
Chen Gang Dec. 10, 2015, 10:15 p.m. UTC | #3
On 12/11/15 04:18, Richard Henderson wrote:
> On 12/10/2015 09:15 AM, Richard Henderson wrote:
>>   d = (uint64_t)sign << 63;
>>   d = deposit64(d, 53, 11, exp);
>>   d = deposit64(d, 21, 32, man);
>>   return float64_to_float32(d, fp_status);
> 
> Hmm.  Actually, this incorrectly adds the implicit bit.  We'd actually need to
> steal portions of softfloat.c to do this properly.  Which still isn't that
> difficult.
> 

Yes, thanks.
Chen Gang Dec. 20, 2015, 3:30 p.m. UTC | #4
After tried, I guess, this way below is incorrect: float64_to_float32()
assumes the input 'd' is already a standard (packed) float64 variable.
But in fact, it is not (e.g. the input from floatsisf2).

And we have to still check TILEGX_F_CALC_CVT, for they are really two
different format: TILEGX_F_CALC_CVT has no HBIT, but TILEGX_F_CALC_NCVT
has HBIT (which we need process it specially).

For me, the way like helper_fdouble_pack2 (the double implementation) is
OK to TILEGX_F_CALC_NCVT format, too.

 - Shift left to get HBIT, and change the related vexp (use vexp instead
   of exp to process overflow cases -- like double implementation does).

 - Use (u)int32_to_float32 for the mantissa.

 - Then process exp again.


Thanks.

On 12/11/15 06:14, Chen Gang wrote:
>> In particular, if gcc decided to optimize fractional fixed-point types, it
>> > would do something very similar to the current floatsisf2 code sequence, except
>> > that it wouldn't use 0x9e as the exponent; it would use something smaller, so
>> > that some number of low bits of the mantessa would be below the radix point.
>> > 
> Oh, really.
> 
>> > Therefore, I think that fsingle_pack2 should do the following: Take the
>> > (sign,exp,man) tuple and slot them into a double -- recall that a single only
>> > has 23 bits in its mantessa, and this temp format has 32 -- then convert the
>> > double to a single.  Pre-rounded single results from fsingle_* will be
>> > unchanged, while integer data that gcc has constructed will be properly rounded.
>> > 
>> > E.g.
>> > 
>> >   uint32_t sign = get_fsingle_sign(sfmt);
>> >   uint32_t exp = get_fsingle_exp(sfmt);
>> >   uint32_t man = get_fsingle_man(sfmt);
>> >   uint64_t d;
>> > 
>> >   /* Adjust the exponent for double precision, preserving Inf/NaN.  */
>> >   if (exp == 0xff) {
>> >     exp = 0x7ff;
>> >   } else {
>> >     exp += 1023 - 127;
>> >   }
>> > 
>> >   d = (uint64_t)sign << 63;
>> >   d = deposit64(d, 53, 11, exp);
>> >   d = deposit64(d, 21, 32, man);
>> >   return float64_to_float32(d, fp_status);
>> > 
>> > Note that this does require float32_to_sfmt to store the mantissa
>> > left-justified. That is, not in bits [54-32] as you're doing now, but in bits
>> > [63-41].
>> > 
> For me, it is a good idea! :-)
> 
>
Richard Henderson Dec. 21, 2015, 3:01 p.m. UTC | #5
On 12/20/2015 07:30 AM, Chen Gang wrote:
> And we have to still check TILEGX_F_CALC_CVT, for they are really two
> different format: TILEGX_F_CALC_CVT has no HBIT, but TILEGX_F_CALC_NCVT
> has HBIT (which we need process it specially).

The both do, in that you re-normalize to produce that HBIT.
That's the whole point.


r~
Chen Gang Dec. 21, 2015, 6:54 p.m. UTC | #6
On 12/21/15 23:01, Richard Henderson wrote:
> On 12/20/2015 07:30 AM, Chen Gang wrote:
>> And we have to still check TILEGX_F_CALC_CVT, for they are really two
>> different format: TILEGX_F_CALC_CVT has no HBIT, but TILEGX_F_CALC_NCVT
>> has HBIT (which we need process it specially).
> 
> The both do, in that you re-normalize to produce that HBIT.
> That's the whole point.
> 

Oh, yes.

But all together, we want to normalize the float value in fsingle_pack2,
so we can not use float64_to_float32(), it assumes the input is already
normalized (if we can let the input normalized, we will return directly).

Thanks.
Richard Henderson Dec. 22, 2015, 2 a.m. UTC | #7
On 12/21/2015 10:54 AM, Chen Gang wrote:
>> The both do, in that you re-normalize to produce that HBIT.
>> That's the whole point.
>>
>
> Oh, yes.
>
> But all together, we want to normalize the float value in fsingle_pack2,
> so we can not use float64_to_float32()...

Of course not.  I told you that you couldn't.


r~
Chen Gang Dec. 22, 2015, 10:29 p.m. UTC | #8
On 12/11/15 06:15, Chen Gang wrote:
> 
> On 12/11/15 04:18, Richard Henderson wrote:
>> On 12/10/2015 09:15 AM, Richard Henderson wrote:
>>>   d = (uint64_t)sign << 63;
>>>   d = deposit64(d, 53, 11, exp);
>>>   d = deposit64(d, 21, 32, man);
>>>   return float64_to_float32(d, fp_status);
>>
>> Hmm.  Actually, this incorrectly adds the implicit bit.  We'd actually need to
>> steal portions of softfloat.c to do this properly.  Which still isn't that
>> difficult.
>>

Oh, sorry, I misunderstood this reply.

> 
> Yes, thanks.
> 

Thanks.
diff mbox

Patch

diff --git a/target-tilegx/helper-fsingle.c b/target-tilegx/helper-fsingle.c
new file mode 100644
index 0000000..a33837e
--- /dev/null
+++ b/target-tilegx/helper-fsingle.c
@@ -0,0 +1,212 @@ 
+/*
+ * QEMU TILE-Gx helpers
+ *
+ *  Copyright (c) 2015 Chen Gang
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, see
+ * <http://www.gnu.org/licenses/lgpl-2.1.html>
+ */
+
+#include "cpu.h"
+#include "qemu-common.h"
+#include "exec/helper-proto.h"
+#include "fpu/softfloat.h"
+
+#include "helper-fshared.c"
+
+/*
+ * FSingle instructions implemenation:
+ *
+ * fsingle_add1         ; calc srca and srcb,
+ *                      ; convert float_32 to TileGXFPSFmt result.
+ *                      ; move TileGXFPSFmt result to dest.
+ *
+ * fsingle_sub1         ; calc srca and srcb.
+ *                      ; convert float_32 to TileGXFPSFmt result.
+ *                      ; move TileGXFPSFmt result to dest.
+ *
+ * fsingle_addsub2      ; nop.
+ *
+ * fsingle_mul1         ; calc srca and srcb.
+ *                      ; convert float_32 value to TileGXFPSFmt result.
+ *                      ; move TileGXFPSFmt result to dest.
+ *
+ * fsingle_mul2         ; move srca to dest.
+ *
+ * fsingle_pack1        ; nop
+ *
+ * fsingle_pack2        ; treate srca as TileGXFPSFmt result.
+ *                      ; convert TileGXFPSFmt result to float_32 value.
+ *                      ; move float_32 value to dest.
+ */
+
+#define TILEGX_F_CALC_CVT   0     /* convert int to fsingle */
+#define TILEGX_F_CALC_NCVT  1     /* Not convertion */
+
+static uint32_t get_f32_exp(float32 f)
+{
+    return extract32(float32_val(f), 23, 8);
+}
+
+static void set_f32_exp(float32 *f, uint32_t exp)
+{
+    *f = make_float32(deposit32(float32_val(*f), 23, 8, exp));
+}
+
+static uint32_t get_f32_man(float32 f)
+{
+    return float32_val(f) & 0x7fffff;
+}
+
+static float32 create_f32_man(uint32_t man)
+{
+     return make_float32(man & 0x7fffff);
+}
+
+static inline uint32_t get_fsingle_exp(uint64_t n)
+{
+    return n & 0xff;
+}
+
+static inline uint64_t create_fsingle_exp(uint32_t exp)
+{
+    return exp & 0xff;
+}
+
+static inline uint32_t get_fsingle_sign(uint64_t n)
+{
+    return test_bit(10, &n);
+}
+
+static inline void set_fsingle_sign(uint64_t *n)
+{
+    set_bit(10, n);
+}
+
+static inline unsigned int get_fsingle_calc(uint64_t n)
+{
+    return test_bit(11, &n);
+}
+
+static inline void set_fsingle_calc(uint64_t *n, uint32_t calc)
+{
+    set_bit(11, n);
+}
+
+static inline unsigned int get_fsingle_man(uint64_t n)
+{
+    return n >> 32;
+}
+
+static inline uint64_t create_fsingle_man(uint32_t man)
+{
+    return (uint64_t)man << 32;
+}
+
+static uint64_t float32_to_sfmt(float32 f)
+{
+    uint64_t sfmt = 0;
+
+    if (float32_is_neg(f)) {
+        set_fsingle_sign(&sfmt);
+    }
+    sfmt |= create_fsingle_exp(get_f32_exp(f));
+    sfmt |= create_fsingle_man((get_f32_man(f) << 8) | (1 << 31));
+
+    return sfmt;
+}
+
+static float32 sfmt_to_float32(uint64_t sfmt, float_status *fp_status)
+{
+    float32 f;
+    uint32_t sign = get_fsingle_sign(sfmt);
+    uint32_t man = get_fsingle_man(sfmt);
+
+    if (get_fsingle_calc(sfmt) == TILEGX_F_CALC_CVT) {
+        if (sign) {
+            return int32_to_float32(0 - man, fp_status);
+        } else {
+            return uint32_to_float32(man, fp_status);
+        }
+    } else {
+        f = float32_set_sign(float32_zero, sign);
+        f |= create_f32_man(man >> 8);
+        set_f32_exp(&f, get_fsingle_exp(sfmt));
+    }
+
+    return f;
+}
+
+uint64_t helper_fsingle_pack2(CPUTLGState *env, uint64_t srca)
+{
+    return float32_val(sfmt_to_float32(srca, &env->fp_status));
+}
+
+static void ana_bits(float_status *fp_status,
+                     float32 fsrca, float32 fsrcb, uint64_t *sfmt)
+{
+    if (float32_eq(fsrca, fsrcb, fp_status)) {
+        *sfmt |= create_fsfd_flag_eq();
+    } else {
+        *sfmt |= create_fsfd_flag_ne();
+    }
+
+    if (float32_lt(fsrca, fsrcb, fp_status)) {
+        *sfmt |= create_fsfd_flag_lt();
+    }
+    if (float32_le(fsrca, fsrcb, fp_status)) {
+        *sfmt |= create_fsfd_flag_le();
+    }
+
+    if (float32_lt(fsrcb, fsrca, fp_status)) {
+        *sfmt |= create_fsfd_flag_gt();
+    }
+    if (float32_le(fsrcb, fsrca, fp_status)) {
+        *sfmt |= create_fsfd_flag_ge();
+    }
+
+    if (float32_unordered(fsrca, fsrcb, fp_status)) {
+        *sfmt |= create_fsfd_flag_un();
+    }
+}
+
+static uint64_t main_calc(float_status *fp_status,
+                          float32 fsrca, float32 fsrcb,
+                          float32 (*calc)(float32, float32, float_status *))
+{
+    uint64_t sfmt = float32_to_sfmt(calc(fsrca, fsrcb, fp_status));
+
+    ana_bits(fp_status, fsrca, fsrcb, &sfmt);
+
+    set_fsingle_calc(&sfmt, TILEGX_F_CALC_NCVT);
+    return sfmt;
+}
+
+uint64_t helper_fsingle_add1(CPUTLGState *env, uint64_t srca, uint64_t srcb)
+{
+    return main_calc(&env->fp_status,
+                     make_float32(srca), make_float32(srcb), float32_add);
+}
+
+uint64_t helper_fsingle_sub1(CPUTLGState *env, uint64_t srca, uint64_t srcb)
+{
+    return main_calc(&env->fp_status,
+                     make_float32(srca), make_float32(srcb), float32_sub);
+}
+
+uint64_t helper_fsingle_mul1(CPUTLGState *env, uint64_t srca, uint64_t srcb)
+{
+    return main_calc(&env->fp_status,
+                     make_float32(srca), make_float32(srcb), float32_mul);
+}