Message ID | 1296229866-32011-2-git-send-email-christophe.lyon@st.com |
---|---|
State | New |
Headers | show |
On Fri, Jan 28, 2011 at 04:50:59PM +0100, christophe.lyon@st.com wrote: > From: Christophe Lyon <christophe.lyon@st.com> > > Handle corner cases where the addition of the rounding constant could > cause overflows. After applying this patch, I get the following gcc warning: CC translate.o cc1: warnings being treated as errors qemu/target-arm/translate.c: In function ‘disas_neon_data_insn’: qemu/target-arm/translate.c:4212: error: ‘imm’ may be used uninitialized in this function make: *** [translate.o] Error 1 > Signed-off-by: Christophe Lyon <christophe.lyon@st.com> > --- > target-arm/neon_helper.c | 61 ++++++++++++++++++++++++++++++++++++++------- > target-arm/translate.c | 17 ++++++++++-- > 2 files changed, 65 insertions(+), 13 deletions(-) > > diff --git a/target-arm/neon_helper.c b/target-arm/neon_helper.c > index bf29bbe..5971275 100644 > --- a/target-arm/neon_helper.c > +++ b/target-arm/neon_helper.c > @@ -540,6 +540,9 @@ uint64_t HELPER(neon_shl_s64)(uint64_t valop, uint64_t shiftop) > return val; > } > > +/* The addition of the rounding constant may overflow, so we use an > + * intermediate 64 bits accumulator, which is really needed only when > + * dealing with 32 bits input values. */ > #define NEON_FN(dest, src1, src2) do { \ > int8_t tmp; \ > tmp = (int8_t)src2; \ > @@ -548,11 +551,12 @@ uint64_t HELPER(neon_shl_s64)(uint64_t valop, uint64_t shiftop) > } else if (tmp < -(ssize_t)sizeof(src1) * 8) { \ > dest = src1 >> (sizeof(src1) * 8 - 1); \ > } else if (tmp == -(ssize_t)sizeof(src1) * 8) { \ > - dest = src1 >> (tmp - 1); \ > + dest = src1 >> (-tmp - 1); \ > dest++; \ > dest >>= 1; \ > } else if (tmp < 0) { \ > - dest = (src1 + (1 << (-1 - tmp))) >> -tmp; \ > + int64_t big_dest = ((int64_t)src1 + (1 << (-1 - tmp))); \ > + dest = big_dest >> -tmp; \ > } else { \ > dest = src1 << tmp; \ > }} while (0) > @@ -561,6 +565,8 @@ NEON_VOP(rshl_s16, neon_s16, 2) > NEON_VOP(rshl_s32, neon_s32, 1) > #undef NEON_FN > > +/* Handling addition overflow with 64 bits inputs values is more > + * tricky than with 32 bits values. */ > uint64_t HELPER(neon_rshl_s64)(uint64_t valop, uint64_t shiftop) > { > int8_t shift = (int8_t)shiftop; > @@ -569,18 +575,37 @@ uint64_t HELPER(neon_rshl_s64)(uint64_t valop, uint64_t shiftop) > val = 0; > } else if (shift < -64) { > val >>= 63; > - } else if (shift == -63) { > + } else if (shift == -64) { > val >>= 63; > val++; > val >>= 1; > } else if (shift < 0) { > - val = (val + ((int64_t)1 << (-1 - shift))) >> -shift; > + int64_t round = (int64_t)1 << (-1 - shift); > + /* Reduce the range as long as the addition overflows. It's > + * sufficient to check if (val+round) is < 0 and val > 0 > + * because round is > 0. */ > + while ((val > 0) && ((val + round) < 0) && round > 1) { > + shift++; > + round >>= 1; > + val >>= 1; > + } > + if ((val > 0) && (val + round) < 0) { > + /* If addition still overflows at this point, it means > + * that round==1, thus shift==-1, and also that > + * val==0x7FFFFFFFFFFFFFFF. */ > + val = 0x4000000000000000LL; > + } else { > + val = (val + round) >> -shift; > + } > } else { > val <<= shift; > } > return val; > } > > +/* The addition of the rounding constant may overflow, so we use an > + * intermediate 64 bits accumulator, which is really needed only when > + * dealing with 32 bits input values. */ > #define NEON_FN(dest, src1, src2) do { \ > int8_t tmp; \ > tmp = (int8_t)src2; \ > @@ -588,9 +613,10 @@ uint64_t HELPER(neon_rshl_s64)(uint64_t valop, uint64_t shiftop) > tmp < -(ssize_t)sizeof(src1) * 8) { \ > dest = 0; \ > } else if (tmp == -(ssize_t)sizeof(src1) * 8) { \ > - dest = src1 >> (tmp - 1); \ > + dest = src1 >> (-tmp - 1); \ > } else if (tmp < 0) { \ > - dest = (src1 + (1 << (-1 - tmp))) >> -tmp; \ > + uint64_t big_dest = ((uint64_t)src1 + (1 << (-1 - tmp))); \ > + dest = big_dest >> -tmp; \ > } else { \ > dest = src1 << tmp; \ > }} while (0) > @@ -602,14 +628,29 @@ NEON_VOP(rshl_u32, neon_u32, 1) > uint64_t HELPER(neon_rshl_u64)(uint64_t val, uint64_t shiftop) > { > int8_t shift = (uint8_t)shiftop; > - if (shift >= 64 || shift < 64) { > + if (shift >= 64 || shift < -64) { > val = 0; > } else if (shift == -64) { > /* Rounding a 1-bit result just preserves that bit. */ > val >>= 63; > - } if (shift < 0) { > - val = (val + ((uint64_t)1 << (-1 - shift))) >> -shift; > - val >>= -shift; > + } else if (shift < 0) { > + uint64_t round = (uint64_t)1 << (-1 - shift); > + /* Reduce the range as long as the addition overflows. It's > + * sufficient to check if (val+round) is < val > + * because val and round are > 0. */ > + while (((val + round) < val) && round > 1) { > + shift++; > + round >>= 1; > + val >>= 1; > + } > + if ((val + round) < val) { > + /* If addition still overflows at this point, it means > + * that round==1, thus shift==-1, and also that > + * val==0x&FFFFFFFFFFFFFFF. */ > + val = 0x8000000000000000LL; > + } else { > + val = (val + round) >> -shift; > + } > } else { > val <<= shift; > } > diff --git a/target-arm/translate.c b/target-arm/translate.c > index 4cf2ecd..b14fa4b 100644 > --- a/target-arm/translate.c > +++ b/target-arm/translate.c > @@ -4885,13 +4885,24 @@ static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn) > tcg_gen_shli_i64(cpu_V0, cpu_V0, shift); > if (size < 2 || !u) { > uint64_t imm64; > - if (size == 0) { > + switch(size) { > + case 0: > imm = (0xffu >> (8 - shift)); > imm |= imm << 16; > - } else { > + break; > + case 1: > imm = 0xffff >> (16 - shift); > + break; > + case 2: > + imm = 0xffffffff >> (32 - shift); > + break; > + } > + if (size < 2) { > + imm64 = imm | (((uint64_t)imm) << 32); > + } else { > + imm64 = imm; > } > - imm64 = imm | (((uint64_t)imm) << 32); > + imm64 = ~imm64; > tcg_gen_andi_i64(cpu_V0, cpu_V0, imm64); > } > } > -- > 1.7.2.3 > > >
On 31.01.2011 09:20, Aurelien Jarno wrote: > On Fri, Jan 28, 2011 at 04:50:59PM +0100, christophe.lyon@st.com wrote: >> From: Christophe Lyon <christophe.lyon@st.com> >> >> Handle corner cases where the addition of the rounding constant could >> cause overflows. > > After applying this patch, I get the following gcc warning: > CC translate.o > cc1: warnings being treated as errors > qemu/target-arm/translate.c: In function ‘disas_neon_data_insn’: > qemu/target-arm/translate.c:4212: error: ‘imm’ may be used uninitialized in this function > make: *** [translate.o] Error 1 > Which GCC version are you using? I don't have such a warning (using GCC-4.5.1 on x86_64). Christophe.
On Mon, Jan 31, 2011 at 10:35:30AM +0100, Christophe Lyon wrote: > On 31.01.2011 09:20, Aurelien Jarno wrote: > > On Fri, Jan 28, 2011 at 04:50:59PM +0100, christophe.lyon@st.com wrote: > >> From: Christophe Lyon <christophe.lyon@st.com> > >> > >> Handle corner cases where the addition of the rounding constant could > >> cause overflows. > > > > After applying this patch, I get the following gcc warning: > > CC translate.o > > cc1: warnings being treated as errors > > qemu/target-arm/translate.c: In function ‘disas_neon_data_insn’: > > qemu/target-arm/translate.c:4212: error: ‘imm’ may be used uninitialized in this function > > make: *** [translate.o] Error 1 > > > > Which GCC version are you using? I don't have such a warning (using GCC-4.5.1 on x86_64). > I get this error with GCC 4.3.5, GCC 4.4.5, GCC 4.5.2 and GCC 4.6.0 (r169270). This is also on x86_64.
On 31.01.2011 10:44, Aurelien Jarno wrote: > On Mon, Jan 31, 2011 at 10:35:30AM +0100, Christophe Lyon wrote: >> On 31.01.2011 09:20, Aurelien Jarno wrote: >>> On Fri, Jan 28, 2011 at 04:50:59PM +0100, christophe.lyon@st.com wrote: >>>> From: Christophe Lyon <christophe.lyon@st.com> >>>> >>>> Handle corner cases where the addition of the rounding constant could >>>> cause overflows. >>> >>> After applying this patch, I get the following gcc warning: >>> CC translate.o >>> cc1: warnings being treated as errors >>> qemu/target-arm/translate.c: In function ‘disas_neon_data_insn’: >>> qemu/target-arm/translate.c:4212: error: ‘imm’ may be used uninitialized in this function >>> make: *** [translate.o] Error 1 >>> >> >> Which GCC version are you using? I don't have such a warning (using GCC-4.5.1 on x86_64). >> > > I get this error with GCC 4.3.5, GCC 4.4.5, GCC 4.5.2 and GCC 4.6.0 > (r169270). This is also on x86_64. > Well, I can't reproduce this error :-( For the record, I configure with --target-list=arm-softmmu,arm-linux-user --disable-bluez --enable-debug --disable-sdl and point --host-cc and --cc to GCC-4.5.1. In verbose mode, I confirm that GCC is invoked with -Werror -m64 -Wstrict-prototypes -Wredundant-decls -Wall -Wundef -Wendif-labels -Wwrite-strings -Wmissing-prototypes -fno-strict-aliasing -fstack-protector-all -Wmissing-include-dirs -Wempty-body -Wnested-externs -Wformat-security -Wformat-y2k -Winit-self -Wignored-qualifiers -Wold-style-declaration -Wold-style-definition -Wtype-limits I have tried from a freshly cloned qemu.git, and I have no error. Can you send me your translate.c & translate.i (pre-processed by GCC's C-preprocesssor with -E option) Christophe.
Christophe Lyon a écrit : > On 31.01.2011 10:44, Aurelien Jarno wrote: >> On Mon, Jan 31, 2011 at 10:35:30AM +0100, Christophe Lyon wrote: >>> On 31.01.2011 09:20, Aurelien Jarno wrote: >>>> On Fri, Jan 28, 2011 at 04:50:59PM +0100, christophe.lyon@st.com wrote: >>>>> From: Christophe Lyon <christophe.lyon@st.com> >>>>> >>>>> Handle corner cases where the addition of the rounding constant could >>>>> cause overflows. >>>> After applying this patch, I get the following gcc warning: >>>> CC translate.o >>>> cc1: warnings being treated as errors >>>> qemu/target-arm/translate.c: In function ‘disas_neon_data_insn’: >>>> qemu/target-arm/translate.c:4212: error: ‘imm’ may be used uninitialized in this function >>>> make: *** [translate.o] Error 1 >>>> >>> Which GCC version are you using? I don't have such a warning (using GCC-4.5.1 on x86_64). >>> >> I get this error with GCC 4.3.5, GCC 4.4.5, GCC 4.5.2 and GCC 4.6.0 >> (r169270). This is also on x86_64. >> > > Well, I can't reproduce this error :-( > For the record, I configure with --target-list=arm-softmmu,arm-linux-user --disable-bluez --enable-debug --disable-sdl and point --host-cc and --cc to GCC-4.5.1. > It seems the problems come from here, if I add --enable-debug, I am not able to reproduce the problem anymore. I don't understand why though.
On 31 January 2011 15:59, Aurelien Jarno <aurelien@aurel32.net> wrote: > Christophe Lyon a écrit : >> Well, I can't reproduce this error :-( >> For the record, I configure with --target-list=arm-softmmu,arm-linux-user --disable-bluez --enable-debug --disable-sdl and point --host-cc and --cc to GCC-4.5.1. >> > > It seems the problems come from here, if I add --enable-debug, I am not > able to reproduce the problem anymore. I don't understand why though. --enable-debug turns off optimisation (ie does not pass -O2); a number of gcc's warnings, including this one, are only done in the dataflow analysis pass and so will not be generated unless you have optimisation enabled. -- PMM
On 31.01.2011 17:46, Peter Maydell wrote: > On 31 January 2011 15:59, Aurelien Jarno <aurelien@aurel32.net> wrote: >> It seems the problems come from here, if I add --enable-debug, I am not >> able to reproduce the problem anymore. I don't understand why though. > > --enable-debug turns off optimisation (ie does not pass -O2); a number > of gcc's warnings, including this one, are only done in the dataflow analysis > pass and so will not be generated unless you have optimisation enabled. > Indeed. I have just sent another patchset which addresses this problem. Christophe.
diff --git a/target-arm/neon_helper.c b/target-arm/neon_helper.c index bf29bbe..5971275 100644 --- a/target-arm/neon_helper.c +++ b/target-arm/neon_helper.c @@ -540,6 +540,9 @@ uint64_t HELPER(neon_shl_s64)(uint64_t valop, uint64_t shiftop) return val; } +/* The addition of the rounding constant may overflow, so we use an + * intermediate 64 bits accumulator, which is really needed only when + * dealing with 32 bits input values. */ #define NEON_FN(dest, src1, src2) do { \ int8_t tmp; \ tmp = (int8_t)src2; \ @@ -548,11 +551,12 @@ uint64_t HELPER(neon_shl_s64)(uint64_t valop, uint64_t shiftop) } else if (tmp < -(ssize_t)sizeof(src1) * 8) { \ dest = src1 >> (sizeof(src1) * 8 - 1); \ } else if (tmp == -(ssize_t)sizeof(src1) * 8) { \ - dest = src1 >> (tmp - 1); \ + dest = src1 >> (-tmp - 1); \ dest++; \ dest >>= 1; \ } else if (tmp < 0) { \ - dest = (src1 + (1 << (-1 - tmp))) >> -tmp; \ + int64_t big_dest = ((int64_t)src1 + (1 << (-1 - tmp))); \ + dest = big_dest >> -tmp; \ } else { \ dest = src1 << tmp; \ }} while (0) @@ -561,6 +565,8 @@ NEON_VOP(rshl_s16, neon_s16, 2) NEON_VOP(rshl_s32, neon_s32, 1) #undef NEON_FN +/* Handling addition overflow with 64 bits inputs values is more + * tricky than with 32 bits values. */ uint64_t HELPER(neon_rshl_s64)(uint64_t valop, uint64_t shiftop) { int8_t shift = (int8_t)shiftop; @@ -569,18 +575,37 @@ uint64_t HELPER(neon_rshl_s64)(uint64_t valop, uint64_t shiftop) val = 0; } else if (shift < -64) { val >>= 63; - } else if (shift == -63) { + } else if (shift == -64) { val >>= 63; val++; val >>= 1; } else if (shift < 0) { - val = (val + ((int64_t)1 << (-1 - shift))) >> -shift; + int64_t round = (int64_t)1 << (-1 - shift); + /* Reduce the range as long as the addition overflows. It's + * sufficient to check if (val+round) is < 0 and val > 0 + * because round is > 0. */ + while ((val > 0) && ((val + round) < 0) && round > 1) { + shift++; + round >>= 1; + val >>= 1; + } + if ((val > 0) && (val + round) < 0) { + /* If addition still overflows at this point, it means + * that round==1, thus shift==-1, and also that + * val==0x7FFFFFFFFFFFFFFF. */ + val = 0x4000000000000000LL; + } else { + val = (val + round) >> -shift; + } } else { val <<= shift; } return val; } +/* The addition of the rounding constant may overflow, so we use an + * intermediate 64 bits accumulator, which is really needed only when + * dealing with 32 bits input values. */ #define NEON_FN(dest, src1, src2) do { \ int8_t tmp; \ tmp = (int8_t)src2; \ @@ -588,9 +613,10 @@ uint64_t HELPER(neon_rshl_s64)(uint64_t valop, uint64_t shiftop) tmp < -(ssize_t)sizeof(src1) * 8) { \ dest = 0; \ } else if (tmp == -(ssize_t)sizeof(src1) * 8) { \ - dest = src1 >> (tmp - 1); \ + dest = src1 >> (-tmp - 1); \ } else if (tmp < 0) { \ - dest = (src1 + (1 << (-1 - tmp))) >> -tmp; \ + uint64_t big_dest = ((uint64_t)src1 + (1 << (-1 - tmp))); \ + dest = big_dest >> -tmp; \ } else { \ dest = src1 << tmp; \ }} while (0) @@ -602,14 +628,29 @@ NEON_VOP(rshl_u32, neon_u32, 1) uint64_t HELPER(neon_rshl_u64)(uint64_t val, uint64_t shiftop) { int8_t shift = (uint8_t)shiftop; - if (shift >= 64 || shift < 64) { + if (shift >= 64 || shift < -64) { val = 0; } else if (shift == -64) { /* Rounding a 1-bit result just preserves that bit. */ val >>= 63; - } if (shift < 0) { - val = (val + ((uint64_t)1 << (-1 - shift))) >> -shift; - val >>= -shift; + } else if (shift < 0) { + uint64_t round = (uint64_t)1 << (-1 - shift); + /* Reduce the range as long as the addition overflows. It's + * sufficient to check if (val+round) is < val + * because val and round are > 0. */ + while (((val + round) < val) && round > 1) { + shift++; + round >>= 1; + val >>= 1; + } + if ((val + round) < val) { + /* If addition still overflows at this point, it means + * that round==1, thus shift==-1, and also that + * val==0x&FFFFFFFFFFFFFFF. */ + val = 0x8000000000000000LL; + } else { + val = (val + round) >> -shift; + } } else { val <<= shift; } diff --git a/target-arm/translate.c b/target-arm/translate.c index 4cf2ecd..b14fa4b 100644 --- a/target-arm/translate.c +++ b/target-arm/translate.c @@ -4885,13 +4885,24 @@ static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn) tcg_gen_shli_i64(cpu_V0, cpu_V0, shift); if (size < 2 || !u) { uint64_t imm64; - if (size == 0) { + switch(size) { + case 0: imm = (0xffu >> (8 - shift)); imm |= imm << 16; - } else { + break; + case 1: imm = 0xffff >> (16 - shift); + break; + case 2: + imm = 0xffffffff >> (32 - shift); + break; + } + if (size < 2) { + imm64 = imm | (((uint64_t)imm) << 32); + } else { + imm64 = imm; } - imm64 = imm | (((uint64_t)imm) << 32); + imm64 = ~imm64; tcg_gen_andi_i64(cpu_V0, cpu_V0, imm64); } }