Patchwork [1/8] target-arm: Fixes for several shift instructions: VRSHL, VRSHR, VRSHRN, VSHLL, VRSRA.

login
register
mail settings
Submitter Christophe LYON
Date Jan. 31, 2011, 6:06 p.m.
Message ID <1296497206-15643-2-git-send-email-christophe.lyon@st.com>
Download mbox | patch
Permalink /patch/81197/
State New
Headers show

Comments

Christophe LYON - Jan. 31, 2011, 6:06 p.m.
From: Christophe Lyon <christophe.lyon@st.com>

For variants with rounding, fix cases where adding the rounding
constant could overflow.

For VSHLL, fix bit mask.

Signed-off-by: Christophe Lyon <christophe.lyon@st.com>
---
 target-arm/neon_helper.c |   61 ++++++++++++++++++++++++++++++++++++++-------
 target-arm/translate.c   |   12 +++++++-
 2 files changed, 61 insertions(+), 12 deletions(-)
Peter Maydell - Feb. 7, 2011, 3:57 p.m.
On 31 January 2011 18:06,  <christophe.lyon@st.com> wrote:
> For variants with rounding, fix cases where adding the rounding
> constant could overflow.
>
> For VSHLL, fix bit mask.

These two things are completely distinct -- please put them
in separate patches.

-- PMM
Christophe LYON - Feb. 9, 2011, 12:16 p.m.
On 07.02.2011 16:57, Peter Maydell wrote:
> On 31 January 2011 18:06,  <christophe.lyon@st.com> wrote:
>> For variants with rounding, fix cases where adding the rounding
>> constant could overflow.
>>
>> For VSHLL, fix bit mask.
> 
> These two things are completely distinct -- please put them
> in separate patches.
> 

OK, I am going to re-submit this whole series of patches.

Thanks

Christophe.

Patch

diff --git a/target-arm/neon_helper.c b/target-arm/neon_helper.c
index fead152..6c832b4 100644
--- a/target-arm/neon_helper.c
+++ b/target-arm/neon_helper.c
@@ -451,6 +451,9 @@  uint64_t HELPER(neon_shl_s64)(uint64_t valop, uint64_t shiftop)
     return val;
 }
 
+/* The addition of the rounding constant may overflow, so we use an
+ * intermediate 64 bits accumulator, which is really needed only when
+ * dealing with 32 bits input values.  */
 #define NEON_FN(dest, src1, src2) do { \
     int8_t tmp; \
     tmp = (int8_t)src2; \
@@ -459,11 +462,12 @@  uint64_t HELPER(neon_shl_s64)(uint64_t valop, uint64_t shiftop)
     } else if (tmp < -(ssize_t)sizeof(src1) * 8) { \
         dest = src1 >> (sizeof(src1) * 8 - 1); \
     } else if (tmp == -(ssize_t)sizeof(src1) * 8) { \
-        dest = src1 >> (tmp - 1); \
+        dest = src1 >> (-tmp - 1); \
         dest++; \
         dest >>= 1; \
     } else if (tmp < 0) { \
-        dest = (src1 + (1 << (-1 - tmp))) >> -tmp; \
+        int64_t big_dest = ((int64_t)src1 + (1 << (-1 - tmp))); \
+        dest = big_dest >> -tmp; \
     } else { \
         dest = src1 << tmp; \
     }} while (0)
@@ -472,6 +476,8 @@  NEON_VOP(rshl_s16, neon_s16, 2)
 NEON_VOP(rshl_s32, neon_s32, 1)
 #undef NEON_FN
 
+/* Handling addition overflow with 64 bits inputs values is more
+ * tricky than with 32 bits values.  */
 uint64_t HELPER(neon_rshl_s64)(uint64_t valop, uint64_t shiftop)
 {
     int8_t shift = (int8_t)shiftop;
@@ -480,18 +486,37 @@  uint64_t HELPER(neon_rshl_s64)(uint64_t valop, uint64_t shiftop)
         val = 0;
     } else if (shift < -64) {
         val >>= 63;
-    } else if (shift == -63) {
+    } else if (shift == -64) {
         val >>= 63;
         val++;
         val >>= 1;
     } else if (shift < 0) {
-        val = (val + ((int64_t)1 << (-1 - shift))) >> -shift;
+        int64_t round = (int64_t)1 << (-1 - shift);
+        /* Reduce the range as long as the addition overflows.  It's
+         * sufficient to check if (val+round) is < 0 and val > 0
+         * because round is > 0.  */
+        while ((val > 0) && ((val + round) < 0) && round > 1) {
+            shift++;
+            round >>= 1;
+            val >>= 1;
+        }
+        if ((val > 0) && (val + round) < 0) {
+            /* If addition still overflows at this point, it means
+             * that round==1, thus shift==-1, and also that
+             * val==0x7FFFFFFFFFFFFFFF.  */
+            val = 0x4000000000000000LL;
+        } else {
+            val = (val + round) >> -shift;
+        }
     } else {
         val <<= shift;
     }
     return val;
 }
 
+/* The addition of the rounding constant may overflow, so we use an
+ * intermediate 64 bits accumulator, which is really needed only when
+ * dealing with 32 bits input values.  */
 #define NEON_FN(dest, src1, src2) do { \
     int8_t tmp; \
     tmp = (int8_t)src2; \
@@ -499,9 +524,10 @@  uint64_t HELPER(neon_rshl_s64)(uint64_t valop, uint64_t shiftop)
         tmp < -(ssize_t)sizeof(src1) * 8) { \
         dest = 0; \
     } else if (tmp == -(ssize_t)sizeof(src1) * 8) { \
-        dest = src1 >> (tmp - 1); \
+        dest = src1 >> (-tmp - 1); \
     } else if (tmp < 0) { \
-        dest = (src1 + (1 << (-1 - tmp))) >> -tmp; \
+        uint64_t big_dest = ((uint64_t)src1 + (1 << (-1 - tmp))); \
+        dest = big_dest >> -tmp; \
     } else { \
         dest = src1 << tmp; \
     }} while (0)
@@ -513,14 +539,29 @@  NEON_VOP(rshl_u32, neon_u32, 1)
 uint64_t HELPER(neon_rshl_u64)(uint64_t val, uint64_t shiftop)
 {
     int8_t shift = (uint8_t)shiftop;
-    if (shift >= 64 || shift < 64) {
+    if (shift >= 64 || shift < -64) {
         val = 0;
     } else if (shift == -64) {
         /* Rounding a 1-bit result just preserves that bit.  */
         val >>= 63;
-    } if (shift < 0) {
-        val = (val + ((uint64_t)1 << (-1 - shift))) >> -shift;
-        val >>= -shift;
+    } else if (shift < 0) {
+        uint64_t round = (uint64_t)1 << (-1 - shift);
+        /* Reduce the range as long as the addition overflows.  It's
+         * sufficient to check if (val+round) is < val
+         * because val and round are > 0.  */
+        while (((val + round) < val) && round > 1) {
+            shift++;
+            round >>= 1;
+            val >>= 1;
+        }
+        if ((val + round) < val) {
+            /* If addition still overflows at this point, it means
+             * that round==1, thus shift==-1, and also that
+             * val==0x&FFFFFFFFFFFFFFF.  */
+            val = 0x8000000000000000LL;
+        } else {
+            val = (val + round) >> -shift;
+        }
     } else {
         val <<= shift;
     }
diff --git a/target-arm/translate.c b/target-arm/translate.c
index d95133f..b44f7a1 100644
--- a/target-arm/translate.c
+++ b/target-arm/translate.c
@@ -4877,10 +4877,18 @@  static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn)
                             if (size == 0) {
                                 imm = (0xffu >> (8 - shift));
                                 imm |= imm << 16;
-                            } else {
+                            } else if (size == 1) {
                                 imm = 0xffff >> (16 - shift);
+                            } else {
+                                /* size == 2 */
+                                imm = 0xffffffff >> (32 - shift);
+                            }
+                            if (size < 2) {
+                                imm64 = imm | (((uint64_t)imm) << 32);
+                            } else {
+                                imm64 = imm;
                             }
-                            imm64 = imm | (((uint64_t)imm) << 32);
+                            imm64 = ~imm64;
                             tcg_gen_andi_i64(cpu_V0, cpu_V0, imm64);
                         }
                     }