diff mbox

[1/6] target-arm: Fix rounding constant addition for Neon shift instructions.

Message ID 1297437062-6118-2-git-send-email-christophe.lyon@st.com
State New
Headers show

Commit Message

Christophe Lyon Feb. 11, 2011, 3:10 p.m. UTC
From: Christophe Lyon <christophe.lyon@st.com>

Handle cases where adding the rounding constant could overflow in Neon
shift instructions: VRSHR, VRSRA, VQRSHRN, VQRSHRUN, VRSHRN.

Signed-off-by: Christophe Lyon <christophe.lyon@st.com>
---
 target-arm/neon_helper.c |  149 ++++++++++++++++++++++++++++++++++++++++++----
 1 files changed, 137 insertions(+), 12 deletions(-)

Comments

Peter Maydell Feb. 14, 2011, 6:12 p.m. UTC | #1
On 11 February 2011 15:10,  <christophe.lyon@st.com> wrote:
> +uint32_t HELPER(neon_rshl_s32)(uint32_t valop, uint32_t shiftop)
> +{
> +    int32_t dest;
> +    int32_t val = (int32_t)valop;
> +    int8_t shift = (int8_t)shiftop;
> +    if (shift >= 32) {
> +        dest = 0;
> +    } else if (shift < -32) {
> +        dest = val >> 31;

This is the wrong answer: large rounding right shifts give zero.

> +    } else if (shift == -32) {
> +        dest = val >> 31;
> +        dest++;
> +        dest >>= 1;

These three lines will always result in dest becoming
0 regardless of the input value.

I'm going to post a patch which fixes these as part
of getting the answers right for VRSHL by large shift
counts in general.

-- PMM
Christophe Lyon Feb. 15, 2011, 10:07 a.m. UTC | #2
On 14.02.2011 19:12, Peter Maydell wrote:
> On 11 February 2011 15:10,  <christophe.lyon@st.com> wrote:
>> +uint32_t HELPER(neon_rshl_s32)(uint32_t valop, uint32_t shiftop)
>> +{
>> +    int32_t dest;
>> +    int32_t val = (int32_t)valop;
>> +    int8_t shift = (int8_t)shiftop;
>> +    if (shift >= 32) {
>> +        dest = 0;
>> +    } else if (shift < -32) {
>> +        dest = val >> 31;
> 
> This is the wrong answer: large rounding right shifts give zero.
> 
>> +    } else if (shift == -32) {
>> +        dest = val >> 31;
>> +        dest++;
>> +        dest >>= 1;
> 
> These three lines will always result in dest becoming
> 0 regardless of the input value.
> 

You are right. Actually, I just intended to fix the case where
-32 < shift < 0, and merely re-instanciated the preceding macro with a known size of 32.

You comments also apply to the 8 and 16 bits variants in that macro.

I am too respectful of existing code :-)

Christophe.
diff mbox

Patch

diff --git a/target-arm/neon_helper.c b/target-arm/neon_helper.c
index cf82072..3f1f3d4 100644
--- a/target-arm/neon_helper.c
+++ b/target-arm/neon_helper.c
@@ -558,9 +558,34 @@  uint64_t HELPER(neon_shl_s64)(uint64_t valop, uint64_t shiftop)
     }} while (0)
 NEON_VOP(rshl_s8, neon_s8, 4)
 NEON_VOP(rshl_s16, neon_s16, 2)
-NEON_VOP(rshl_s32, neon_s32, 1)
 #undef NEON_FN
 
+/* The addition of the rounding constant may overflow, so we use an
+ * intermediate 64 bits accumulator.  */
+uint32_t HELPER(neon_rshl_s32)(uint32_t valop, uint32_t shiftop)
+{
+    int32_t dest;
+    int32_t val = (int32_t)valop;
+    int8_t shift = (int8_t)shiftop;
+    if (shift >= 32) {
+        dest = 0;
+    } else if (shift < -32) {
+        dest = val >> 31;
+    } else if (shift == -32) {
+        dest = val >> 31;
+        dest++;
+        dest >>= 1;
+    } else if (shift < 0) {
+        int64_t big_dest = ((int64_t)val + (1 << (-1 - shift)));
+        dest = big_dest >> -shift;
+    } else {
+        dest = val << shift;
+    }
+    return dest;
+}
+
+/* Handling addition overflow with 64 bits inputs values is more
+ * tricky than with 32 bits values.  */
 uint64_t HELPER(neon_rshl_s64)(uint64_t valop, uint64_t shiftop)
 {
     int8_t shift = (int8_t)shiftop;
@@ -574,7 +599,16 @@  uint64_t HELPER(neon_rshl_s64)(uint64_t valop, uint64_t shiftop)
         val++;
         val >>= 1;
     } else if (shift < 0) {
-        val = (val + ((int64_t)1 << (-1 - shift))) >> -shift;
+        val >>= (-shift - 1);
+        if (val == INT64_MAX) {
+            /* In this case, it means that the rounding constant is 1,
+             * and the addition would overflow. Return the actual
+             * result directly.  */
+            val = 0x4000000000000000LL;
+        } else {
+            val++;
+            val >>= 1;
+        }
     } else {
         val <<= shift;
     }
@@ -596,9 +630,29 @@  uint64_t HELPER(neon_rshl_s64)(uint64_t valop, uint64_t shiftop)
     }} while (0)
 NEON_VOP(rshl_u8, neon_u8, 4)
 NEON_VOP(rshl_u16, neon_u16, 2)
-NEON_VOP(rshl_u32, neon_u32, 1)
 #undef NEON_FN
 
+/* The addition of the rounding constant may overflow, so we use an
+ * intermediate 64 bits accumulator.  */
+uint32_t HELPER(neon_rshl_u32)(uint32_t val, uint32_t shiftop)
+{
+    uint32_t dest;
+    int8_t shift = (int8_t)shiftop;
+    if (shift >= 32 || shift < -32) {
+        dest = 0;
+    } else if (shift == -32) {
+        dest = val >> 31;
+    } else if (shift < 0) {
+        uint64_t big_dest = ((uint64_t)val + (1 << (-1 - shift)));
+        dest = big_dest >> -shift;
+    } else {
+        dest = val << shift;
+    }
+    return dest;
+}
+
+/* Handling addition overflow with 64 bits inputs values is more
+ * tricky than with 32 bits values.  */
 uint64_t HELPER(neon_rshl_u64)(uint64_t val, uint64_t shiftop)
 {
     int8_t shift = (uint8_t)shiftop;
@@ -607,9 +661,17 @@  uint64_t HELPER(neon_rshl_u64)(uint64_t val, uint64_t shiftop)
     } else if (shift == -64) {
         /* Rounding a 1-bit result just preserves that bit.  */
         val >>= 63;
-    } if (shift < 0) {
-        val = (val + ((uint64_t)1 << (-1 - shift))) >> -shift;
-        val >>= -shift;
+    } else if (shift < 0) {
+        val >>= (-shift - 1);
+        if (val == UINT64_MAX) {
+            /* In this case, it means that the rounding constant is 1,
+             * and the addition would overflow. Return the actual
+             * result directly.  */
+            val = 0x8000000000000000ULL;
+        } else {
+            val++;
+            val >>= 1;
+        }
     } else {
         val <<= shift;
     }
@@ -784,14 +846,43 @@  uint64_t HELPER(neon_qshlu_s64)(CPUState *env, uint64_t valop, uint64_t shiftop)
     }} while (0)
 NEON_VOP_ENV(qrshl_u8, neon_u8, 4)
 NEON_VOP_ENV(qrshl_u16, neon_u16, 2)
-NEON_VOP_ENV(qrshl_u32, neon_u32, 1)
 #undef NEON_FN
 
+/* The addition of the rounding constant may overflow, so we use an
+ * intermediate 64 bits accumulator.  */
+uint32_t HELPER(neon_qrshl_u32)(CPUState *env, uint32_t val, uint32_t shiftop)
+{
+    uint32_t dest;
+    int8_t shift = (int8_t)shiftop;
+    if (shift < 0) {
+        uint64_t big_dest = ((uint64_t)val + ( 1 << (-1 - shift)));
+        dest = big_dest >> -shift;
+    } else {
+        dest = val << shift;
+        if ((dest >> shift) != val) {
+            SET_QC();
+            dest = ~0;
+        }
+    }
+    return dest;
+}
+
+/* Handling addition overflow with 64 bits inputs values is more
+ * tricky than with 32 bits values.  */
 uint64_t HELPER(neon_qrshl_u64)(CPUState *env, uint64_t val, uint64_t shiftop)
 {
     int8_t shift = (int8_t)shiftop;
     if (shift < 0) {
-        val = (val + (1 << (-1 - shift))) >> -shift;
+        val >>= (-shift - 1);
+        if (val == UINT64_MAX) {
+            /* In this case, it means that the rounding constant is 1,
+             * and the addition would overflow. Return the actual
+             * result directly.  */
+            val = 0x8000000000000000ULL;
+        } else {
+            val++;
+            val >>= 1;
+        }
     } else { \
         uint64_t tmp = val;
         val <<= shift;
@@ -817,22 +908,56 @@  uint64_t HELPER(neon_qrshl_u64)(CPUState *env, uint64_t val, uint64_t shiftop)
     }} while (0)
 NEON_VOP_ENV(qrshl_s8, neon_s8, 4)
 NEON_VOP_ENV(qrshl_s16, neon_s16, 2)
-NEON_VOP_ENV(qrshl_s32, neon_s32, 1)
 #undef NEON_FN
 
+/* The addition of the rounding constant may overflow, so we use an
+ * intermediate 64 bits accumulator.  */
+uint32_t HELPER(neon_qrshl_s32)(CPUState *env, uint32_t valop, uint32_t shiftop)
+{
+    int32_t dest;
+    int32_t val = (int32_t)valop;
+    int8_t shift = (int8_t)shiftop;
+    if (shift < 0) {
+        int64_t big_dest = ((int64_t)val + (1 << (-1 - shift)));
+        dest = big_dest >> -shift;
+    } else {
+        dest = val << shift;
+        if ((dest >> shift) != val) {
+            SET_QC();
+            dest = (uint32_t)(1 << (sizeof(val) * 8 - 1)) - (val > 0 ? 1 : 0);
+        }
+    }
+    return dest;
+}
+
+/* Handling addition overflow with 64 bits inputs values is more
+ * tricky than with 32 bits values.  */
 uint64_t HELPER(neon_qrshl_s64)(CPUState *env, uint64_t valop, uint64_t shiftop)
 {
     int8_t shift = (uint8_t)shiftop;
     int64_t val = valop;
 
     if (shift < 0) {
-        val = (val + (1 << (-1 - shift))) >> -shift;
+        val >>= (-shift - 1);
+        if (val == INT64_MAX) {
+            /* In this case, it means that the rounding constant is 1,
+             * and the addition would overflow. Return the actual
+             * result directly.  */
+            val = 0x4000000000000000ULL;
+        } else {
+            val++;
+            val >>= 1;
+        }
     } else {
-        int64_t tmp = val;;
+        int64_t tmp = val;
         val <<= shift;
         if ((val >> shift) != tmp) {
             SET_QC();
-            val = tmp >> 31;
+            if (tmp < 0) {
+                val = INT64_MIN;
+            } else {
+                val = INT64_MAX;
+            }
         }
     }
     return val;