Message ID | 1297879186-12670-3-git-send-email-christophe.lyon@st.com |
---|---|
State | New |
Headers | show |
On 16 February 2011 17:59, <christophe.lyon@st.com> wrote: > From: Christophe Lyon <christophe.lyon@st.com> > > Now use the same algorithm as described in the ARM ARM. > > Signed-off-by: Christophe Lyon <christophe.lyon@st.com> Mostly looks good, and seems to pass random testing. > + float_status *s = &env->vfp.standard_fp_status; > + float64 one = int64_to_float64(1, s); You don't need to create a variable for this, float64_one will do what you want. > + /* q = (int)(a * 512.0) */ > + float64 x512 = int64_to_float64(512, s); > + float64 q = float64_mul(x512, a, s); > + int64_t q_int = float64_to_int64_round_to_zero(q, s); > + > + /* r = 1.0 / (((double)q + 0.5) / 512.0) */ > + q = int64_to_float64(q_int, s); > + float64 half = float64_div(one, int64_to_float64(2, s), s); ...and a runtime division just to get a constant 0.5? Better to just make_float64() on the appropriate bit pattern, I think. > float32 HELPER(recpe_f32)(float32 a, CPUState *env) > { > - float_status *s = &env->vfp.fp_status; > - float32 one = int32_to_float32(1, s); > - return float32_div(one, a, s); > + float_status *s = &env->vfp.standard_fp_status; > + float64 f64; > + uint32_t val32 = float32_val(a); > + > + int result_exp; > + int a_exp = (val32 & 0x7f800000) >> 23; > + int sign = val32 & 0x80000000; > + > + if (float32_is_any_nan(a)) { > + return float32_default_nan; This won't set InvalidOp if the input is a signalling NaN. -- PMM
>> + float_status *s = &env->vfp.standard_fp_status; >> + float64 one = int64_to_float64(1, s); > You don't need to create a variable for this, float64_one > will do what you want. OK >> + /* q = (int)(a * 512.0) */ >> + float64 x512 = int64_to_float64(512, s); >> + float64 q = float64_mul(x512, a, s); >> + int64_t q_int = float64_to_int64_round_to_zero(q, s); >> + >> + /* r = 1.0 / (((double)q + 0.5) / 512.0) */ >> + q = int64_to_float64(q_int, s); >> + float64 half = float64_div(one, int64_to_float64(2, s), s); > ...and a runtime division just to get a constant 0.5? > Better to just make_float64() on the appropriate bit > pattern, I think. It makes sense. Then, what about using the right bit patterns for 512 and 256? Actually, for these last two, I mimicked recps and rsqrts which build constants 2 and 3. I could add another patch to address this point. >> + if (float32_is_any_nan(a)) { >> + return float32_default_nan; > This won't set InvalidOp if the input is a signalling NaN. Thanks for the notice, I have no means of testing those cases. Christophe.
On 17 February 2011 17:51, Christophe Lyon <christophe.lyon@st.com> wrote: >> ...and a runtime division just to get a constant 0.5? >> Better to just make_float64() on the appropriate bit >> pattern, I think. > > It makes sense. > Then, what about using the right bit patterns for 512 and 256? Not so worried about those -- it's really the division I wanted to avoid, int-to-float isn't as expensive. -- PMM
diff --git a/target-arm/helper.c b/target-arm/helper.c index 7f63a28..7751d21 100644 --- a/target-arm/helper.c +++ b/target-arm/helper.c @@ -2687,13 +2687,68 @@ float32 HELPER(rsqrts_f32)(float32 a, float32 b, CPUState *env) /* NEON helpers. */ -/* TODO: The architecture specifies the value that the estimate functions - should return. We return the exact reciprocal/root instead. */ +/* The algorithm that must be used to calculate the estimate + * is specified by the ARM ARM. + */ +static float64 recip_estimate(float64 a, CPUState *env) +{ + float_status *s = &env->vfp.standard_fp_status; + float64 one = int64_to_float64(1, s); + /* q = (int)(a * 512.0) */ + float64 x512 = int64_to_float64(512, s); + float64 q = float64_mul(x512, a, s); + int64_t q_int = float64_to_int64_round_to_zero(q, s); + + /* r = 1.0 / (((double)q + 0.5) / 512.0) */ + q = int64_to_float64(q_int, s); + float64 half = float64_div(one, int64_to_float64(2, s), s); + q = float64_add(q, half, s); + q = float64_div(q, x512, s); + q = float64_div(one, q, s); + + /* s = (int)(256.0 * r + 0.5) */ + float64 x256 = int64_to_float64(256, s); + q = float64_mul(q, x256, s); + q = float64_add(q, half, s); + q_int = float64_to_int64_round_to_zero(q, s); + + /* return (double)s / 256.0 */ + return float64_div(int64_to_float64(q_int, s), x256, s); +} + float32 HELPER(recpe_f32)(float32 a, CPUState *env) { - float_status *s = &env->vfp.fp_status; - float32 one = int32_to_float32(1, s); - return float32_div(one, a, s); + float_status *s = &env->vfp.standard_fp_status; + float64 f64; + uint32_t val32 = float32_val(a); + + int result_exp; + int a_exp = (val32 & 0x7f800000) >> 23; + int sign = val32 & 0x80000000; + + if (float32_is_any_nan(a)) { + return float32_default_nan; + } else if (float32_is_infinity(a)) { + return float32_set_sign(float32_zero, float32_is_neg(a)); + } else if (float32_is_zero_or_denormal(a)) { + float_raise(float_flag_divbyzero, s); + return float32_set_sign(float32_infinity, float32_is_neg(a)); + } else if (a_exp >= 253) { + float_raise(float_flag_underflow, s); + return float32_set_sign(float32_zero, float32_is_neg(a)); + } + + f64 = make_float64((0x3feULL << 52) + | ((int64_t)(val32 & 0x7fffff) << 29)); + + result_exp = 253 - a_exp; + + f64 = recip_estimate(f64, env); + + val32 = sign + | ((result_exp & 0xff) << 23) + | ((float64_val(f64) >> 29) & 0x7fffff); + return make_float32(val32); } float32 HELPER(rsqrte_f32)(float32 a, CPUState *env) @@ -2705,13 +2760,18 @@ float32 HELPER(rsqrte_f32)(float32 a, CPUState *env) uint32_t HELPER(recpe_u32)(uint32_t a, CPUState *env) { - float_status *s = &env->vfp.fp_status; - float32 tmp; - tmp = int32_to_float32(a, s); - tmp = float32_scalbn(tmp, -32, s); - tmp = helper_recpe_f32(tmp, env); - tmp = float32_scalbn(tmp, 31, s); - return float32_to_int32(tmp, s); + float64 f64; + + if ((a & 0x80000000) == 0) { + return 0xffffffff; + } + + f64 = make_float64((0x3feULL << 52) + | ((int64_t)(a & 0x7fffffff) << 21)); + + f64 = recip_estimate (f64, env); + + return 0x80000000 | ((float64_val(f64) >> 21) & 0x7fffffff); } uint32_t HELPER(rsqrte_u32)(uint32_t a, CPUState *env)