Patchwork [2/3] target-arm: fix support for vrecpe.

login
register
mail settings
Submitter Christophe LYON
Date Feb. 16, 2011, 5:59 p.m.
Message ID <1297879186-12670-3-git-send-email-christophe.lyon@st.com>
Download mbox | patch
Permalink /patch/83383/
State New
Headers show

Comments

Christophe LYON - Feb. 16, 2011, 5:59 p.m.
From: Christophe Lyon <christophe.lyon@st.com>

Now use the same algorithm as described in the ARM ARM.

Signed-off-by: Christophe Lyon <christophe.lyon@st.com>
---
 target-arm/helper.c |   84 +++++++++++++++++++++++++++++++++++++++++++-------
 1 files changed, 72 insertions(+), 12 deletions(-)
Peter Maydell - Feb. 17, 2011, 12:23 p.m.
On 16 February 2011 17:59,  <christophe.lyon@st.com> wrote:
> From: Christophe Lyon <christophe.lyon@st.com>
>
> Now use the same algorithm as described in the ARM ARM.
>
> Signed-off-by: Christophe Lyon <christophe.lyon@st.com>

Mostly looks good, and seems to pass random testing.

> +    float_status *s = &env->vfp.standard_fp_status;
> +    float64 one = int64_to_float64(1, s);

You don't need to create a variable for this, float64_one
will do what you want.

> +    /* q = (int)(a * 512.0) */
> +    float64 x512 = int64_to_float64(512, s);
> +    float64 q = float64_mul(x512, a, s);
> +    int64_t q_int = float64_to_int64_round_to_zero(q, s);
> +
> +    /* r = 1.0 / (((double)q + 0.5) / 512.0) */
> +    q = int64_to_float64(q_int, s);
> +    float64 half = float64_div(one, int64_to_float64(2, s), s);

...and a runtime division just to get a constant 0.5?
Better to just make_float64() on the appropriate bit
pattern, I think.

>  float32 HELPER(recpe_f32)(float32 a, CPUState *env)
>  {
> -    float_status *s = &env->vfp.fp_status;
> -    float32 one = int32_to_float32(1, s);
> -    return float32_div(one, a, s);
> +    float_status *s = &env->vfp.standard_fp_status;
> +    float64 f64;
> +    uint32_t val32 = float32_val(a);
> +
> +    int result_exp;
> +    int a_exp = (val32  & 0x7f800000) >> 23;
> +    int sign = val32 & 0x80000000;
> +
> +    if (float32_is_any_nan(a)) {
> +        return float32_default_nan;

This won't set InvalidOp if the input is a signalling NaN.

-- PMM
Christophe LYON - Feb. 17, 2011, 5:51 p.m.
>> +    float_status *s = &env->vfp.standard_fp_status;
>> +    float64 one = int64_to_float64(1, s);
> You don't need to create a variable for this, float64_one
> will do what you want.
OK

 
>> +    /* q = (int)(a * 512.0) */
>> +    float64 x512 = int64_to_float64(512, s);
>> +    float64 q = float64_mul(x512, a, s);
>> +    int64_t q_int = float64_to_int64_round_to_zero(q, s);
>> +
>> +    /* r = 1.0 / (((double)q + 0.5) / 512.0) */
>> +    q = int64_to_float64(q_int, s);
>> +    float64 half = float64_div(one, int64_to_float64(2, s), s);
> ...and a runtime division just to get a constant 0.5?
> Better to just make_float64() on the appropriate bit
> pattern, I think.

It makes sense.
Then, what about using the right bit patterns for 512 and 256?

Actually, for these last two, I mimicked recps and rsqrts which build constants 2 and 3. I could add another patch to address this point.
 

>> +    if (float32_is_any_nan(a)) {
>> +        return float32_default_nan;
> This won't set InvalidOp if the input is a signalling NaN.
Thanks for the notice, I have no means of testing those cases.

Christophe.
Peter Maydell - Feb. 17, 2011, 6:35 p.m.
On 17 February 2011 17:51, Christophe Lyon <christophe.lyon@st.com> wrote:

>> ...and a runtime division just to get a constant 0.5?
>> Better to just make_float64() on the appropriate bit
>> pattern, I think.
>
> It makes sense.
> Then, what about using the right bit patterns for 512 and 256?

Not so worried about those -- it's really the division I
wanted to avoid, int-to-float isn't as expensive.

-- PMM

Patch

diff --git a/target-arm/helper.c b/target-arm/helper.c
index 7f63a28..7751d21 100644
--- a/target-arm/helper.c
+++ b/target-arm/helper.c
@@ -2687,13 +2687,68 @@  float32 HELPER(rsqrts_f32)(float32 a, float32 b, CPUState *env)
 
 /* NEON helpers.  */
 
-/* TODO: The architecture specifies the value that the estimate functions
-   should return.  We return the exact reciprocal/root instead.  */
+/* The algorithm that must be used to calculate the estimate
+ * is specified by the ARM ARM.
+ */
+static float64 recip_estimate(float64 a, CPUState *env)
+{
+    float_status *s = &env->vfp.standard_fp_status;
+    float64 one = int64_to_float64(1, s);
+    /* q = (int)(a * 512.0) */
+    float64 x512 = int64_to_float64(512, s);
+    float64 q = float64_mul(x512, a, s);
+    int64_t q_int = float64_to_int64_round_to_zero(q, s);
+
+    /* r = 1.0 / (((double)q + 0.5) / 512.0) */
+    q = int64_to_float64(q_int, s);
+    float64 half = float64_div(one, int64_to_float64(2, s), s);
+    q = float64_add(q, half, s);
+    q = float64_div(q, x512, s);
+    q = float64_div(one, q, s);
+
+    /* s = (int)(256.0 * r + 0.5) */
+    float64 x256 = int64_to_float64(256, s);
+    q = float64_mul(q, x256, s);
+    q = float64_add(q, half, s);
+    q_int = float64_to_int64_round_to_zero(q, s);
+
+    /* return (double)s / 256.0 */
+    return float64_div(int64_to_float64(q_int, s), x256, s);
+}
+
 float32 HELPER(recpe_f32)(float32 a, CPUState *env)
 {
-    float_status *s = &env->vfp.fp_status;
-    float32 one = int32_to_float32(1, s);
-    return float32_div(one, a, s);
+    float_status *s = &env->vfp.standard_fp_status;
+    float64 f64;
+    uint32_t val32 = float32_val(a);
+
+    int result_exp;
+    int a_exp = (val32  & 0x7f800000) >> 23;
+    int sign = val32 & 0x80000000;
+
+    if (float32_is_any_nan(a)) {
+        return float32_default_nan;
+    } else if (float32_is_infinity(a)) {
+        return float32_set_sign(float32_zero, float32_is_neg(a));
+    } else if (float32_is_zero_or_denormal(a)) {
+        float_raise(float_flag_divbyzero, s);
+        return float32_set_sign(float32_infinity, float32_is_neg(a));
+    } else if (a_exp >= 253) {
+        float_raise(float_flag_underflow, s);
+        return float32_set_sign(float32_zero, float32_is_neg(a));
+    }
+
+    f64 = make_float64((0x3feULL << 52)
+                       | ((int64_t)(val32 & 0x7fffff) << 29));
+
+    result_exp = 253 - a_exp;
+
+    f64 = recip_estimate(f64, env);
+
+    val32 = sign
+        | ((result_exp & 0xff) << 23)
+        | ((float64_val(f64) >> 29) & 0x7fffff);
+    return make_float32(val32);
 }
 
 float32 HELPER(rsqrte_f32)(float32 a, CPUState *env)
@@ -2705,13 +2760,18 @@  float32 HELPER(rsqrte_f32)(float32 a, CPUState *env)
 
 uint32_t HELPER(recpe_u32)(uint32_t a, CPUState *env)
 {
-    float_status *s = &env->vfp.fp_status;
-    float32 tmp;
-    tmp = int32_to_float32(a, s);
-    tmp = float32_scalbn(tmp, -32, s);
-    tmp = helper_recpe_f32(tmp, env);
-    tmp = float32_scalbn(tmp, 31, s);
-    return float32_to_int32(tmp, s);
+    float64 f64;
+
+    if ((a & 0x80000000) == 0) {
+        return 0xffffffff;
+    }
+
+    f64 = make_float64((0x3feULL << 52)
+                       | ((int64_t)(a & 0x7fffffff) << 21));
+
+    f64 = recip_estimate (f64, env);
+
+    return 0x80000000 | ((float64_val(f64) >> 21) & 0x7fffffff);
 }
 
 uint32_t HELPER(rsqrte_u32)(uint32_t a, CPUState *env)