From patchwork Mon Jan 31 18:06:39 2011 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Christophe Lyon X-Patchwork-Id: 81197 Return-Path: X-Original-To: incoming@patchwork.ozlabs.org Delivered-To: patchwork-incoming@bilbo.ozlabs.org Received: from lists.gnu.org (lists.gnu.org [199.232.76.165]) (using TLSv1 with cipher DHE-RSA-AES256-SHA (256/256 bits)) (Client did not present a certificate) by ozlabs.org (Postfix) with ESMTPS id 09FE910093C for ; Tue, 1 Feb 2011 05:14:27 +1100 (EST) Received: from localhost ([127.0.0.1]:52220 helo=lists.gnu.org) by lists.gnu.org with esmtp (Exim 4.43) id 1PjyGd-00073m-Pd for incoming@patchwork.ozlabs.org; Mon, 31 Jan 2011 13:14:23 -0500 Received: from [140.186.70.92] (port=59699 helo=eggs.gnu.org) by lists.gnu.org with esmtp (Exim 4.43) id 1Pjy9R-0003ZW-NA for qemu-devel@nongnu.org; Mon, 31 Jan 2011 13:06:59 -0500 Received: from Debian-exim by eggs.gnu.org with spam-scanned (Exim 4.71) (envelope-from ) id 1Pjy9O-0004QB-16 for qemu-devel@nongnu.org; Mon, 31 Jan 2011 13:06:56 -0500 Received: from eu1sys200aog119.obsmtp.com ([207.126.144.147]:55412) by eggs.gnu.org with esmtp (Exim 4.71) (envelope-from ) id 1Pjy9N-0004Pl-Ma for qemu-devel@nongnu.org; Mon, 31 Jan 2011 13:06:53 -0500 Received: from source ([164.129.1.35]) (using TLSv1) by eu1sys200aob119.postini.com ([207.126.147.11]) with SMTP ID DSNKTUb6PC9itcsQUBg7VDhLyJ8Sceqze3Hi@postini.com; Mon, 31 Jan 2011 18:06:53 UTC Received: from zeta.dmz-eu.st.com (ns2.st.com [164.129.230.9]) by beta.dmz-eu.st.com (STMicroelectronics) with ESMTP id 67BDFB5 for ; Mon, 31 Jan 2011 18:06:52 +0000 (GMT) Received: from Webmail-eu.st.com (safex1hubcas5.st.com [10.75.90.71]) by zeta.dmz-eu.st.com (STMicroelectronics) with ESMTP id 515BE2A59 for ; Mon, 31 Jan 2011 18:06:52 +0000 (GMT) Received: from localhost.localdomain (164.129.122.40) by webmail-eu.st.com (10.75.90.13) with Microsoft SMTP Server (TLS) id 8.2.234.1; Mon, 31 Jan 2011 19:06:51 +0100 From: To: Date: Mon, 31 Jan 2011 19:06:39 +0100 Message-ID: <1296497206-15643-2-git-send-email-christophe.lyon@st.com> X-Mailer: git-send-email 1.7.2.3 In-Reply-To: <1296497206-15643-1-git-send-email-christophe.lyon@st.com> References: <1296497206-15643-1-git-send-email-christophe.lyon@st.com> MIME-Version: 1.0 X-detected-operating-system: by eggs.gnu.org: GNU/Linux 2.6, seldom 2.4 (older, 4) X-Received-From: 207.126.144.147 Subject: [Qemu-devel] [PATCH 1/8] target-arm: Fixes for several shift instructions: VRSHL, VRSHR, VRSHRN, VSHLL, VRSRA. X-BeenThere: qemu-devel@nongnu.org X-Mailman-Version: 2.1.5 Precedence: list List-Id: qemu-devel.nongnu.org List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Sender: qemu-devel-bounces+incoming=patchwork.ozlabs.org@nongnu.org Errors-To: qemu-devel-bounces+incoming=patchwork.ozlabs.org@nongnu.org From: Christophe Lyon For variants with rounding, fix cases where adding the rounding constant could overflow. For VSHLL, fix bit mask. Signed-off-by: Christophe Lyon --- target-arm/neon_helper.c | 61 ++++++++++++++++++++++++++++++++++++++------- target-arm/translate.c | 12 +++++++- 2 files changed, 61 insertions(+), 12 deletions(-) diff --git a/target-arm/neon_helper.c b/target-arm/neon_helper.c index fead152..6c832b4 100644 --- a/target-arm/neon_helper.c +++ b/target-arm/neon_helper.c @@ -451,6 +451,9 @@ uint64_t HELPER(neon_shl_s64)(uint64_t valop, uint64_t shiftop) return val; } +/* The addition of the rounding constant may overflow, so we use an + * intermediate 64 bits accumulator, which is really needed only when + * dealing with 32 bits input values. */ #define NEON_FN(dest, src1, src2) do { \ int8_t tmp; \ tmp = (int8_t)src2; \ @@ -459,11 +462,12 @@ uint64_t HELPER(neon_shl_s64)(uint64_t valop, uint64_t shiftop) } else if (tmp < -(ssize_t)sizeof(src1) * 8) { \ dest = src1 >> (sizeof(src1) * 8 - 1); \ } else if (tmp == -(ssize_t)sizeof(src1) * 8) { \ - dest = src1 >> (tmp - 1); \ + dest = src1 >> (-tmp - 1); \ dest++; \ dest >>= 1; \ } else if (tmp < 0) { \ - dest = (src1 + (1 << (-1 - tmp))) >> -tmp; \ + int64_t big_dest = ((int64_t)src1 + (1 << (-1 - tmp))); \ + dest = big_dest >> -tmp; \ } else { \ dest = src1 << tmp; \ }} while (0) @@ -472,6 +476,8 @@ NEON_VOP(rshl_s16, neon_s16, 2) NEON_VOP(rshl_s32, neon_s32, 1) #undef NEON_FN +/* Handling addition overflow with 64 bits inputs values is more + * tricky than with 32 bits values. */ uint64_t HELPER(neon_rshl_s64)(uint64_t valop, uint64_t shiftop) { int8_t shift = (int8_t)shiftop; @@ -480,18 +486,37 @@ uint64_t HELPER(neon_rshl_s64)(uint64_t valop, uint64_t shiftop) val = 0; } else if (shift < -64) { val >>= 63; - } else if (shift == -63) { + } else if (shift == -64) { val >>= 63; val++; val >>= 1; } else if (shift < 0) { - val = (val + ((int64_t)1 << (-1 - shift))) >> -shift; + int64_t round = (int64_t)1 << (-1 - shift); + /* Reduce the range as long as the addition overflows. It's + * sufficient to check if (val+round) is < 0 and val > 0 + * because round is > 0. */ + while ((val > 0) && ((val + round) < 0) && round > 1) { + shift++; + round >>= 1; + val >>= 1; + } + if ((val > 0) && (val + round) < 0) { + /* If addition still overflows at this point, it means + * that round==1, thus shift==-1, and also that + * val==0x7FFFFFFFFFFFFFFF. */ + val = 0x4000000000000000LL; + } else { + val = (val + round) >> -shift; + } } else { val <<= shift; } return val; } +/* The addition of the rounding constant may overflow, so we use an + * intermediate 64 bits accumulator, which is really needed only when + * dealing with 32 bits input values. */ #define NEON_FN(dest, src1, src2) do { \ int8_t tmp; \ tmp = (int8_t)src2; \ @@ -499,9 +524,10 @@ uint64_t HELPER(neon_rshl_s64)(uint64_t valop, uint64_t shiftop) tmp < -(ssize_t)sizeof(src1) * 8) { \ dest = 0; \ } else if (tmp == -(ssize_t)sizeof(src1) * 8) { \ - dest = src1 >> (tmp - 1); \ + dest = src1 >> (-tmp - 1); \ } else if (tmp < 0) { \ - dest = (src1 + (1 << (-1 - tmp))) >> -tmp; \ + uint64_t big_dest = ((uint64_t)src1 + (1 << (-1 - tmp))); \ + dest = big_dest >> -tmp; \ } else { \ dest = src1 << tmp; \ }} while (0) @@ -513,14 +539,29 @@ NEON_VOP(rshl_u32, neon_u32, 1) uint64_t HELPER(neon_rshl_u64)(uint64_t val, uint64_t shiftop) { int8_t shift = (uint8_t)shiftop; - if (shift >= 64 || shift < 64) { + if (shift >= 64 || shift < -64) { val = 0; } else if (shift == -64) { /* Rounding a 1-bit result just preserves that bit. */ val >>= 63; - } if (shift < 0) { - val = (val + ((uint64_t)1 << (-1 - shift))) >> -shift; - val >>= -shift; + } else if (shift < 0) { + uint64_t round = (uint64_t)1 << (-1 - shift); + /* Reduce the range as long as the addition overflows. It's + * sufficient to check if (val+round) is < val + * because val and round are > 0. */ + while (((val + round) < val) && round > 1) { + shift++; + round >>= 1; + val >>= 1; + } + if ((val + round) < val) { + /* If addition still overflows at this point, it means + * that round==1, thus shift==-1, and also that + * val==0x&FFFFFFFFFFFFFFF. */ + val = 0x8000000000000000LL; + } else { + val = (val + round) >> -shift; + } } else { val <<= shift; } diff --git a/target-arm/translate.c b/target-arm/translate.c index d95133f..b44f7a1 100644 --- a/target-arm/translate.c +++ b/target-arm/translate.c @@ -4877,10 +4877,18 @@ static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn) if (size == 0) { imm = (0xffu >> (8 - shift)); imm |= imm << 16; - } else { + } else if (size == 1) { imm = 0xffff >> (16 - shift); + } else { + /* size == 2 */ + imm = 0xffffffff >> (32 - shift); + } + if (size < 2) { + imm64 = imm | (((uint64_t)imm) << 32); + } else { + imm64 = imm; } - imm64 = imm | (((uint64_t)imm) << 32); + imm64 = ~imm64; tcg_gen_andi_i64(cpu_V0, cpu_V0, imm64); } }