diff mbox

[1/8] target-arm: Fixes for several shift instructions: VRSHL, VRSHR, VRSHRN, VSHLL, VRSRA.

Message ID 1296229866-32011-2-git-send-email-christophe.lyon@st.com
State New
Headers show

Commit Message

Christophe Lyon Jan. 28, 2011, 3:50 p.m. UTC
From: Christophe Lyon <christophe.lyon@st.com>

Handle corner cases where the addition of the rounding constant could
cause overflows.

Signed-off-by: Christophe Lyon <christophe.lyon@st.com>
---
 target-arm/neon_helper.c |   61 ++++++++++++++++++++++++++++++++++++++-------
 target-arm/translate.c   |   17 ++++++++++--
 2 files changed, 65 insertions(+), 13 deletions(-)

Comments

Aurelien Jarno Jan. 31, 2011, 8:20 a.m. UTC | #1
On Fri, Jan 28, 2011 at 04:50:59PM +0100, christophe.lyon@st.com wrote:
> From: Christophe Lyon <christophe.lyon@st.com>
> 
> Handle corner cases where the addition of the rounding constant could
> cause overflows.

After applying this patch, I get the following gcc warning:
  CC    translate.o
cc1: warnings being treated as errors
qemu/target-arm/translate.c: In function ‘disas_neon_data_insn’:
qemu/target-arm/translate.c:4212: error: ‘imm’ may be used uninitialized in this function
make: *** [translate.o] Error 1


> Signed-off-by: Christophe Lyon <christophe.lyon@st.com>
> ---
>  target-arm/neon_helper.c |   61 ++++++++++++++++++++++++++++++++++++++-------
>  target-arm/translate.c   |   17 ++++++++++--
>  2 files changed, 65 insertions(+), 13 deletions(-)
> 
> diff --git a/target-arm/neon_helper.c b/target-arm/neon_helper.c
> index bf29bbe..5971275 100644
> --- a/target-arm/neon_helper.c
> +++ b/target-arm/neon_helper.c
> @@ -540,6 +540,9 @@ uint64_t HELPER(neon_shl_s64)(uint64_t valop, uint64_t shiftop)
>      return val;
>  }
>  
> +/* The addition of the rounding constant may overflow, so we use an
> + * intermediate 64 bits accumulator, which is really needed only when
> + * dealing with 32 bits input values.  */
>  #define NEON_FN(dest, src1, src2) do { \
>      int8_t tmp; \
>      tmp = (int8_t)src2; \
> @@ -548,11 +551,12 @@ uint64_t HELPER(neon_shl_s64)(uint64_t valop, uint64_t shiftop)
>      } else if (tmp < -(ssize_t)sizeof(src1) * 8) { \
>          dest = src1 >> (sizeof(src1) * 8 - 1); \
>      } else if (tmp == -(ssize_t)sizeof(src1) * 8) { \
> -        dest = src1 >> (tmp - 1); \
> +        dest = src1 >> (-tmp - 1); \
>          dest++; \
>          dest >>= 1; \
>      } else if (tmp < 0) { \
> -        dest = (src1 + (1 << (-1 - tmp))) >> -tmp; \
> +        int64_t big_dest = ((int64_t)src1 + (1 << (-1 - tmp))); \
> +        dest = big_dest >> -tmp; \
>      } else { \
>          dest = src1 << tmp; \
>      }} while (0)
> @@ -561,6 +565,8 @@ NEON_VOP(rshl_s16, neon_s16, 2)
>  NEON_VOP(rshl_s32, neon_s32, 1)
>  #undef NEON_FN
>  
> +/* Handling addition overflow with 64 bits inputs values is more
> + * tricky than with 32 bits values.  */
>  uint64_t HELPER(neon_rshl_s64)(uint64_t valop, uint64_t shiftop)
>  {
>      int8_t shift = (int8_t)shiftop;
> @@ -569,18 +575,37 @@ uint64_t HELPER(neon_rshl_s64)(uint64_t valop, uint64_t shiftop)
>          val = 0;
>      } else if (shift < -64) {
>          val >>= 63;
> -    } else if (shift == -63) {
> +    } else if (shift == -64) {
>          val >>= 63;
>          val++;
>          val >>= 1;
>      } else if (shift < 0) {
> -        val = (val + ((int64_t)1 << (-1 - shift))) >> -shift;
> +        int64_t round = (int64_t)1 << (-1 - shift);
> +        /* Reduce the range as long as the addition overflows.  It's
> +         * sufficient to check if (val+round) is < 0 and val > 0
> +         * because round is > 0.  */
> +        while ((val > 0) && ((val + round) < 0) && round > 1) {
> +            shift++;
> +            round >>= 1;
> +            val >>= 1;
> +        }
> +        if ((val > 0) && (val + round) < 0) {
> +            /* If addition still overflows at this point, it means
> +             * that round==1, thus shift==-1, and also that
> +             * val==0x7FFFFFFFFFFFFFFF.  */
> +            val = 0x4000000000000000LL;
> +        } else {
> +            val = (val + round) >> -shift;
> +        }
>      } else {
>          val <<= shift;
>      }
>      return val;
>  }
>  
> +/* The addition of the rounding constant may overflow, so we use an
> + * intermediate 64 bits accumulator, which is really needed only when
> + * dealing with 32 bits input values.  */
>  #define NEON_FN(dest, src1, src2) do { \
>      int8_t tmp; \
>      tmp = (int8_t)src2; \
> @@ -588,9 +613,10 @@ uint64_t HELPER(neon_rshl_s64)(uint64_t valop, uint64_t shiftop)
>          tmp < -(ssize_t)sizeof(src1) * 8) { \
>          dest = 0; \
>      } else if (tmp == -(ssize_t)sizeof(src1) * 8) { \
> -        dest = src1 >> (tmp - 1); \
> +        dest = src1 >> (-tmp - 1); \
>      } else if (tmp < 0) { \
> -        dest = (src1 + (1 << (-1 - tmp))) >> -tmp; \
> +        uint64_t big_dest = ((uint64_t)src1 + (1 << (-1 - tmp))); \
> +        dest = big_dest >> -tmp; \
>      } else { \
>          dest = src1 << tmp; \
>      }} while (0)
> @@ -602,14 +628,29 @@ NEON_VOP(rshl_u32, neon_u32, 1)
>  uint64_t HELPER(neon_rshl_u64)(uint64_t val, uint64_t shiftop)
>  {
>      int8_t shift = (uint8_t)shiftop;
> -    if (shift >= 64 || shift < 64) {
> +    if (shift >= 64 || shift < -64) {
>          val = 0;
>      } else if (shift == -64) {
>          /* Rounding a 1-bit result just preserves that bit.  */
>          val >>= 63;
> -    } if (shift < 0) {
> -        val = (val + ((uint64_t)1 << (-1 - shift))) >> -shift;
> -        val >>= -shift;
> +    } else if (shift < 0) {
> +        uint64_t round = (uint64_t)1 << (-1 - shift);
> +        /* Reduce the range as long as the addition overflows.  It's
> +         * sufficient to check if (val+round) is < val
> +         * because val and round are > 0.  */
> +        while (((val + round) < val) && round > 1) {
> +            shift++;
> +            round >>= 1;
> +            val >>= 1;
> +        }
> +        if ((val + round) < val) {
> +            /* If addition still overflows at this point, it means
> +             * that round==1, thus shift==-1, and also that
> +             * val==0x&FFFFFFFFFFFFFFF.  */
> +            val = 0x8000000000000000LL;
> +        } else {
> +            val = (val + round) >> -shift;
> +        }
>      } else {
>          val <<= shift;
>      }
> diff --git a/target-arm/translate.c b/target-arm/translate.c
> index 4cf2ecd..b14fa4b 100644
> --- a/target-arm/translate.c
> +++ b/target-arm/translate.c
> @@ -4885,13 +4885,24 @@ static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn)
>                          tcg_gen_shli_i64(cpu_V0, cpu_V0, shift);
>                          if (size < 2 || !u) {
>                              uint64_t imm64;
> -                            if (size == 0) {
> +                            switch(size) {
> +                            case 0:
>                                  imm = (0xffu >> (8 - shift));
>                                  imm |= imm << 16;
> -                            } else {
> +                                break;
> +                            case 1:
>                                  imm = 0xffff >> (16 - shift);
> +                                break;
> +                            case 2:
> +                                imm = 0xffffffff >> (32 - shift);
> +                                break;
> +                            }
> +                            if (size < 2) {
> +                                imm64 = imm | (((uint64_t)imm) << 32);
> +                            } else {
> +                                imm64 = imm;
>                              }
> -                            imm64 = imm | (((uint64_t)imm) << 32);
> +                            imm64 = ~imm64;
>                              tcg_gen_andi_i64(cpu_V0, cpu_V0, imm64);
>                          }
>                      }
> -- 
> 1.7.2.3
> 
> 
>
Christophe Lyon Jan. 31, 2011, 9:35 a.m. UTC | #2
On 31.01.2011 09:20, Aurelien Jarno wrote:
> On Fri, Jan 28, 2011 at 04:50:59PM +0100, christophe.lyon@st.com wrote:
>> From: Christophe Lyon <christophe.lyon@st.com>
>>
>> Handle corner cases where the addition of the rounding constant could
>> cause overflows.
> 
> After applying this patch, I get the following gcc warning:
>   CC    translate.o
> cc1: warnings being treated as errors
> qemu/target-arm/translate.c: In function ‘disas_neon_data_insn’:
> qemu/target-arm/translate.c:4212: error: ‘imm’ may be used uninitialized in this function
> make: *** [translate.o] Error 1
> 

Which GCC version are you using? I don't have such a warning (using GCC-4.5.1 on x86_64).

Christophe.
Aurelien Jarno Jan. 31, 2011, 9:44 a.m. UTC | #3
On Mon, Jan 31, 2011 at 10:35:30AM +0100, Christophe Lyon wrote:
> On 31.01.2011 09:20, Aurelien Jarno wrote:
> > On Fri, Jan 28, 2011 at 04:50:59PM +0100, christophe.lyon@st.com wrote:
> >> From: Christophe Lyon <christophe.lyon@st.com>
> >>
> >> Handle corner cases where the addition of the rounding constant could
> >> cause overflows.
> > 
> > After applying this patch, I get the following gcc warning:
> >   CC    translate.o
> > cc1: warnings being treated as errors
> > qemu/target-arm/translate.c: In function ‘disas_neon_data_insn’:
> > qemu/target-arm/translate.c:4212: error: ‘imm’ may be used uninitialized in this function
> > make: *** [translate.o] Error 1
> > 
> 
> Which GCC version are you using? I don't have such a warning (using GCC-4.5.1 on x86_64).
> 

I get this error with GCC 4.3.5, GCC 4.4.5, GCC 4.5.2 and GCC 4.6.0 
(r169270). This is also on x86_64.
Christophe Lyon Jan. 31, 2011, 2:27 p.m. UTC | #4
On 31.01.2011 10:44, Aurelien Jarno wrote:
> On Mon, Jan 31, 2011 at 10:35:30AM +0100, Christophe Lyon wrote:
>> On 31.01.2011 09:20, Aurelien Jarno wrote:
>>> On Fri, Jan 28, 2011 at 04:50:59PM +0100, christophe.lyon@st.com wrote:
>>>> From: Christophe Lyon <christophe.lyon@st.com>
>>>>
>>>> Handle corner cases where the addition of the rounding constant could
>>>> cause overflows.
>>>
>>> After applying this patch, I get the following gcc warning:
>>>   CC    translate.o
>>> cc1: warnings being treated as errors
>>> qemu/target-arm/translate.c: In function ‘disas_neon_data_insn’:
>>> qemu/target-arm/translate.c:4212: error: ‘imm’ may be used uninitialized in this function
>>> make: *** [translate.o] Error 1
>>>
>>
>> Which GCC version are you using? I don't have such a warning (using GCC-4.5.1 on x86_64).
>>
> 
> I get this error with GCC 4.3.5, GCC 4.4.5, GCC 4.5.2 and GCC 4.6.0 
> (r169270). This is also on x86_64.
> 

Well, I can't reproduce this error :-(
For the record, I configure with --target-list=arm-softmmu,arm-linux-user --disable-bluez --enable-debug --disable-sdl and point --host-cc and --cc to GCC-4.5.1.

In verbose mode, I confirm that GCC is invoked with
-Werror -m64 -Wstrict-prototypes -Wredundant-decls -Wall -Wundef -Wendif-labels -Wwrite-strings -Wmissing-prototypes -fno-strict-aliasing -fstack-protector-all -Wmissing-include-dirs -Wempty-body -Wnested-externs -Wformat-security -Wformat-y2k -Winit-self -Wignored-qualifiers -Wold-style-declaration -Wold-style-definition -Wtype-limits

I have tried from a freshly cloned qemu.git, and I have no error.
Can you send me your translate.c & translate.i (pre-processed by GCC's C-preprocesssor with -E option)

Christophe.
Aurelien Jarno Jan. 31, 2011, 3:59 p.m. UTC | #5
Christophe Lyon a écrit :
> On 31.01.2011 10:44, Aurelien Jarno wrote:
>> On Mon, Jan 31, 2011 at 10:35:30AM +0100, Christophe Lyon wrote:
>>> On 31.01.2011 09:20, Aurelien Jarno wrote:
>>>> On Fri, Jan 28, 2011 at 04:50:59PM +0100, christophe.lyon@st.com wrote:
>>>>> From: Christophe Lyon <christophe.lyon@st.com>
>>>>>
>>>>> Handle corner cases where the addition of the rounding constant could
>>>>> cause overflows.
>>>> After applying this patch, I get the following gcc warning:
>>>>   CC    translate.o
>>>> cc1: warnings being treated as errors
>>>> qemu/target-arm/translate.c: In function ‘disas_neon_data_insn’:
>>>> qemu/target-arm/translate.c:4212: error: ‘imm’ may be used uninitialized in this function
>>>> make: *** [translate.o] Error 1
>>>>
>>> Which GCC version are you using? I don't have such a warning (using GCC-4.5.1 on x86_64).
>>>
>> I get this error with GCC 4.3.5, GCC 4.4.5, GCC 4.5.2 and GCC 4.6.0 
>> (r169270). This is also on x86_64.
>>
> 
> Well, I can't reproduce this error :-(
> For the record, I configure with --target-list=arm-softmmu,arm-linux-user --disable-bluez --enable-debug --disable-sdl and point --host-cc and --cc to GCC-4.5.1.
> 

It seems the problems come from here, if I add --enable-debug, I am not
able to reproduce the problem anymore. I don't understand why though.
Peter Maydell Jan. 31, 2011, 4:46 p.m. UTC | #6
On 31 January 2011 15:59, Aurelien Jarno <aurelien@aurel32.net> wrote:
> Christophe Lyon a écrit :
>> Well, I can't reproduce this error :-(
>> For the record, I configure with --target-list=arm-softmmu,arm-linux-user --disable-bluez --enable-debug --disable-sdl and point --host-cc and --cc to GCC-4.5.1.
>>
>
> It seems the problems come from here, if I add --enable-debug, I am not
> able to reproduce the problem anymore. I don't understand why though.

--enable-debug turns off optimisation (ie does not pass -O2); a number
of gcc's warnings, including this one, are only done in the dataflow analysis
pass and so will not be generated unless you have optimisation enabled.

-- PMM
Christophe Lyon Jan. 31, 2011, 6:09 p.m. UTC | #7
On 31.01.2011 17:46, Peter Maydell wrote:
> On 31 January 2011 15:59, Aurelien Jarno <aurelien@aurel32.net> wrote:
>> It seems the problems come from here, if I add --enable-debug, I am not
>> able to reproduce the problem anymore. I don't understand why though.
> 
> --enable-debug turns off optimisation (ie does not pass -O2); a number
> of gcc's warnings, including this one, are only done in the dataflow analysis
> pass and so will not be generated unless you have optimisation enabled.
> 

Indeed.

I have just sent another patchset which addresses this problem.

Christophe.
diff mbox

Patch

diff --git a/target-arm/neon_helper.c b/target-arm/neon_helper.c
index bf29bbe..5971275 100644
--- a/target-arm/neon_helper.c
+++ b/target-arm/neon_helper.c
@@ -540,6 +540,9 @@  uint64_t HELPER(neon_shl_s64)(uint64_t valop, uint64_t shiftop)
     return val;
 }
 
+/* The addition of the rounding constant may overflow, so we use an
+ * intermediate 64 bits accumulator, which is really needed only when
+ * dealing with 32 bits input values.  */
 #define NEON_FN(dest, src1, src2) do { \
     int8_t tmp; \
     tmp = (int8_t)src2; \
@@ -548,11 +551,12 @@  uint64_t HELPER(neon_shl_s64)(uint64_t valop, uint64_t shiftop)
     } else if (tmp < -(ssize_t)sizeof(src1) * 8) { \
         dest = src1 >> (sizeof(src1) * 8 - 1); \
     } else if (tmp == -(ssize_t)sizeof(src1) * 8) { \
-        dest = src1 >> (tmp - 1); \
+        dest = src1 >> (-tmp - 1); \
         dest++; \
         dest >>= 1; \
     } else if (tmp < 0) { \
-        dest = (src1 + (1 << (-1 - tmp))) >> -tmp; \
+        int64_t big_dest = ((int64_t)src1 + (1 << (-1 - tmp))); \
+        dest = big_dest >> -tmp; \
     } else { \
         dest = src1 << tmp; \
     }} while (0)
@@ -561,6 +565,8 @@  NEON_VOP(rshl_s16, neon_s16, 2)
 NEON_VOP(rshl_s32, neon_s32, 1)
 #undef NEON_FN
 
+/* Handling addition overflow with 64 bits inputs values is more
+ * tricky than with 32 bits values.  */
 uint64_t HELPER(neon_rshl_s64)(uint64_t valop, uint64_t shiftop)
 {
     int8_t shift = (int8_t)shiftop;
@@ -569,18 +575,37 @@  uint64_t HELPER(neon_rshl_s64)(uint64_t valop, uint64_t shiftop)
         val = 0;
     } else if (shift < -64) {
         val >>= 63;
-    } else if (shift == -63) {
+    } else if (shift == -64) {
         val >>= 63;
         val++;
         val >>= 1;
     } else if (shift < 0) {
-        val = (val + ((int64_t)1 << (-1 - shift))) >> -shift;
+        int64_t round = (int64_t)1 << (-1 - shift);
+        /* Reduce the range as long as the addition overflows.  It's
+         * sufficient to check if (val+round) is < 0 and val > 0
+         * because round is > 0.  */
+        while ((val > 0) && ((val + round) < 0) && round > 1) {
+            shift++;
+            round >>= 1;
+            val >>= 1;
+        }
+        if ((val > 0) && (val + round) < 0) {
+            /* If addition still overflows at this point, it means
+             * that round==1, thus shift==-1, and also that
+             * val==0x7FFFFFFFFFFFFFFF.  */
+            val = 0x4000000000000000LL;
+        } else {
+            val = (val + round) >> -shift;
+        }
     } else {
         val <<= shift;
     }
     return val;
 }
 
+/* The addition of the rounding constant may overflow, so we use an
+ * intermediate 64 bits accumulator, which is really needed only when
+ * dealing with 32 bits input values.  */
 #define NEON_FN(dest, src1, src2) do { \
     int8_t tmp; \
     tmp = (int8_t)src2; \
@@ -588,9 +613,10 @@  uint64_t HELPER(neon_rshl_s64)(uint64_t valop, uint64_t shiftop)
         tmp < -(ssize_t)sizeof(src1) * 8) { \
         dest = 0; \
     } else if (tmp == -(ssize_t)sizeof(src1) * 8) { \
-        dest = src1 >> (tmp - 1); \
+        dest = src1 >> (-tmp - 1); \
     } else if (tmp < 0) { \
-        dest = (src1 + (1 << (-1 - tmp))) >> -tmp; \
+        uint64_t big_dest = ((uint64_t)src1 + (1 << (-1 - tmp))); \
+        dest = big_dest >> -tmp; \
     } else { \
         dest = src1 << tmp; \
     }} while (0)
@@ -602,14 +628,29 @@  NEON_VOP(rshl_u32, neon_u32, 1)
 uint64_t HELPER(neon_rshl_u64)(uint64_t val, uint64_t shiftop)
 {
     int8_t shift = (uint8_t)shiftop;
-    if (shift >= 64 || shift < 64) {
+    if (shift >= 64 || shift < -64) {
         val = 0;
     } else if (shift == -64) {
         /* Rounding a 1-bit result just preserves that bit.  */
         val >>= 63;
-    } if (shift < 0) {
-        val = (val + ((uint64_t)1 << (-1 - shift))) >> -shift;
-        val >>= -shift;
+    } else if (shift < 0) {
+        uint64_t round = (uint64_t)1 << (-1 - shift);
+        /* Reduce the range as long as the addition overflows.  It's
+         * sufficient to check if (val+round) is < val
+         * because val and round are > 0.  */
+        while (((val + round) < val) && round > 1) {
+            shift++;
+            round >>= 1;
+            val >>= 1;
+        }
+        if ((val + round) < val) {
+            /* If addition still overflows at this point, it means
+             * that round==1, thus shift==-1, and also that
+             * val==0x&FFFFFFFFFFFFFFF.  */
+            val = 0x8000000000000000LL;
+        } else {
+            val = (val + round) >> -shift;
+        }
     } else {
         val <<= shift;
     }
diff --git a/target-arm/translate.c b/target-arm/translate.c
index 4cf2ecd..b14fa4b 100644
--- a/target-arm/translate.c
+++ b/target-arm/translate.c
@@ -4885,13 +4885,24 @@  static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn)
                         tcg_gen_shli_i64(cpu_V0, cpu_V0, shift);
                         if (size < 2 || !u) {
                             uint64_t imm64;
-                            if (size == 0) {
+                            switch(size) {
+                            case 0:
                                 imm = (0xffu >> (8 - shift));
                                 imm |= imm << 16;
-                            } else {
+                                break;
+                            case 1:
                                 imm = 0xffff >> (16 - shift);
+                                break;
+                            case 2:
+                                imm = 0xffffffff >> (32 - shift);
+                                break;
+                            }
+                            if (size < 2) {
+                                imm64 = imm | (((uint64_t)imm) << 32);
+                            } else {
+                                imm64 = imm;
                             }
-                            imm64 = imm | (((uint64_t)imm) << 32);
+                            imm64 = ~imm64;
                             tcg_gen_andi_i64(cpu_V0, cpu_V0, imm64);
                         }
                     }