Patchwork [3/3] target-arm: fix support for vrsqrte.

login
register
mail settings
Submitter Christophe LYON
Date Feb. 16, 2011, 5:59 p.m.
Message ID <1297879186-12670-4-git-send-email-christophe.lyon@st.com>
Download mbox | patch
Permalink /patch/83380/
State New
Headers show

Comments

Christophe LYON - Feb. 16, 2011, 5:59 p.m.
From: Christophe Lyon <christophe.lyon@st.com>

Now use the same algorithm as described in the ARM ARM.

Signed-off-by: Christophe Lyon <christophe.lyon@st.com>
---
 target-arm/helper.c |  122 ++++++++++++++++++++++++++++++++++++++++++++++----
 1 files changed, 112 insertions(+), 10 deletions(-)
Peter Maydell - Feb. 17, 2011, 12:49 p.m.
On 16 February 2011 17:59,  <christophe.lyon@st.com> wrote:
> From: Christophe Lyon <christophe.lyon@st.com>
>
> Now use the same algorithm as described in the ARM ARM.

This doesn't pass random testing.

> +static float64 recip_sqrt_estimate(float64 a, CPUState *env)
> +{
> +    float_status *s = &env->vfp.standard_fp_status;
> +    float64 one = int64_to_float64(1, s);
> +    float64 half = float64_div(one, int64_to_float64(2, s), s);

Same remarks about one and half apply as for 2/3.
Maybe we should put a float64_half in softfloat.h.

>  float32 HELPER(rsqrte_f32)(float32 a, CPUState *env)
>  {
> -    float_status *s = &env->vfp.fp_status;
> -    float32 one = int32_to_float32(1, s);

Use float32_one.

> +    if (float32_is_any_nan(a)) {
> +        return float32_default_nan;

Should raise InvalidOp for an SNaN.

> +    } else if (float32_is_zero(a)) {
> +        float_raise(float_flag_divbyzero, s);
> +        return float32_set_sign(float32_infinity, float32_is_neg(a));

Should use float32_is_zero_or_denormal()
so we handle denormals properly.

> +    } else if (val < 0) {

Use if (float32_is_neg(a)). Then you can make
val and val64 bit uint32_t,uint64_t rather than
the signed types, which makes more sense given
these are really just bit patterns.

> +    if ((val & 0x800000) == 0) {
> +        f64 = make_float64(((uint64_t)(val & 0x80000000) << 32)
> +                           | (0x3feULL << 52)
> +                           | ((uint64_t)(val & 0x7ffffff) << 29));
> +    } else {
> +        f64 = make_float64(((uint64_t)(val & 0x80000000) << 32)
> +                           | (0x3fdULL << 52)
> +                           | ((uint64_t)(val & 0x7ffffff) << 29));
> +    }

These are both wrong -- the ARM ARM says f64
gets operand<31> : ‘01111111110’ : operand<22:0> : Zeros(29);
(or similar for the other case), which means you
need to mask with 0x7fffff. (Count the 'f's !)

If you fix this and the denormal case then it passes
random testing.

-- PMM

Patch

diff --git a/target-arm/helper.c b/target-arm/helper.c
index 7751d21..f0f2231 100644
--- a/target-arm/helper.c
+++ b/target-arm/helper.c
@@ -2751,11 +2751,105 @@  float32 HELPER(recpe_f32)(float32 a, CPUState *env)
     return make_float32(val32);
 }
 
+/* The algorithm that must be used to calculate the estimate
+ * is specified by the ARM ARM.
+ */
+static float64 recip_sqrt_estimate(float64 a, CPUState *env)
+{
+    float_status *s = &env->vfp.standard_fp_status;
+    float64 one = int64_to_float64(1, s);
+    float64 half = float64_div(one, int64_to_float64(2, s), s);
+    float64 x256 = int64_to_float64(256, s);
+    float64 q;
+    int64_t q_int;
+
+    if (float64_lt(a, half, s)) {
+        /* range 0.25 <= a < 0.5 */
+
+        /* a in units of 1/512 rounded down */
+        /* q0 = (int)(a * 512.0);  */
+        float64 x512 = int64_to_float64(512, s);
+        q = float64_mul(x512, a, s);
+        q_int = float64_to_int64_round_to_zero(q, s);
+
+        /* reciprocal root r */
+        /* r = 1.0 / sqrt(((double)q0 + 0.5) / 512.0);  */
+        q = int64_to_float64(q_int, s);
+        q = float64_add(q, half, s);
+        q = float64_div(q, x512, s);
+        q = float64_sqrt(q, s);
+        q = float64_div(one, q, s);
+    } else {
+        /* range 0.5 <= a < 1.0 */
+
+        /* a in units of 1/256 rounded down */
+        /* q1 = (int)(a * 256.0); */
+        q = float64_mul(x256, a, s);
+        int64_t q_int = float64_to_int64_round_to_zero(q, s);
+
+        /* reciprocal root r */
+        /* r = 1.0 /sqrt(((double)q1 + 0.5) / 256); */
+        q = int64_to_float64(q_int, s);
+        q = float64_add(q, half, s);
+        q = float64_div(q, x256, s);
+        q = float64_sqrt(q, s);
+        q = float64_div(one, q, s);
+    }
+    /* r in units of 1/256 rounded to nearest */
+    /* s = (int)(256.0 * r + 0.5); */
+
+    q = float64_mul(q, x256,s );
+    q = float64_add(q, half, s);
+    q_int = float64_to_int64_round_to_zero(q, s);
+
+    /* return (double)s / 256.0;*/
+    return float64_div(int64_to_float64(q_int, s), x256, s);
+}
+
 float32 HELPER(rsqrte_f32)(float32 a, CPUState *env)
 {
-    float_status *s = &env->vfp.fp_status;
-    float32 one = int32_to_float32(1, s);
-    return float32_div(one, float32_sqrt(a, s), s);
+    float_status *s = &env->vfp.standard_fp_status;
+    int result_exp;
+    float64 f64;
+    int32_t val;
+    int64_t val64;
+
+    val = float32_val(a);
+
+    if (float32_is_any_nan(a)) {
+        return float32_default_nan;
+    } else if (float32_is_zero(a)) {
+        float_raise(float_flag_divbyzero, s);
+        return float32_set_sign(float32_infinity, float32_is_neg(a));
+    } else if (val < 0) {
+        float_raise(float_flag_invalid, s);
+        return float32_default_nan;
+    } else if (float32_is_infinity(a)) {
+        return float32_zero;
+    }
+
+    /* Normalize to a double-precision value between 0.25 and 1.0,
+     * preserving the parity of the exponent.  */
+    if ((val & 0x800000) == 0) {
+        f64 = make_float64(((uint64_t)(val & 0x80000000) << 32)
+                           | (0x3feULL << 52)
+                           | ((uint64_t)(val & 0x7ffffff) << 29));
+    } else {
+        f64 = make_float64(((uint64_t)(val & 0x80000000) << 32)
+                           | (0x3fdULL << 52)
+                           | ((uint64_t)(val & 0x7ffffff) << 29));
+    }
+
+    result_exp = (380 - ((val & 0x7f800000) >> 23)) / 2;
+
+    f64 = recip_sqrt_estimate(f64, env);
+
+    val64 = float64_val(f64);
+
+    val = ((val64 >> 63)  & 0x80000000)
+        | ((result_exp & 0xff) << 23)
+        | ((val64 >> 29)  & 0x7fffff);
+    return make_float32(val);
 }
 
 uint32_t HELPER(recpe_u32)(uint32_t a, CPUState *env)
@@ -2776,13 +2870,21 @@  uint32_t HELPER(recpe_u32)(uint32_t a, CPUState *env)
 
 uint32_t HELPER(rsqrte_u32)(uint32_t a, CPUState *env)
 {
-    float_status *s = &env->vfp.fp_status;
-    float32 tmp;
-    tmp = int32_to_float32(a, s);
-    tmp = float32_scalbn(tmp, -32, s);
-    tmp = helper_rsqrte_f32(tmp, env);
-    tmp = float32_scalbn(tmp, 31, s);
-    return float32_to_int32(tmp, s);
+    float64 f64;
+
+    if ((a & 0xc0000000) == 0) return 0xffffffff;
+
+    if (a & 0x80000000) {
+        f64 = make_float64((0x3feULL << 52)
+                           | ((uint64_t)(a & 0x7fffffff) << 21));
+    } else { /* bits 31-30 == '01' */
+        f64 = make_float64((0x3fdULL << 52)
+                           | ((uint64_t)(a & 0x3fffffff) << 22));
+    }
+
+    f64 = recip_sqrt_estimate(f64, env);
+
+    return 0x80000000 | ((float64_val(f64) >> 21) & 0x7fffffff);
 }
 
 void HELPER(set_teecr)(CPUState *env, uint32_t val)