From patchwork Mon May 10 22:23:33 2010 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Richard Henderson X-Patchwork-Id: 52243 Return-Path: X-Original-To: incoming@patchwork.ozlabs.org Delivered-To: patchwork-incoming@bilbo.ozlabs.org Received: from lists.gnu.org (lists.gnu.org [199.232.76.165]) (using TLSv1 with cipher DHE-RSA-AES256-SHA (256/256 bits)) (Client did not present a certificate) by ozlabs.org (Postfix) with ESMTPS id 6E38EB7D72 for ; Tue, 11 May 2010 08:27:30 +1000 (EST) Received: from localhost ([127.0.0.1]:57536 helo=lists.gnu.org) by lists.gnu.org with esmtp (Exim 4.43) id 1OBbRf-0000jk-0v for incoming@patchwork.ozlabs.org; Mon, 10 May 2010 18:27:27 -0400 Received: from [140.186.70.92] (port=55275 helo=eggs.gnu.org) by lists.gnu.org with esmtp (Exim 4.43) id 1OBbO6-0007J0-D8 for qemu-devel@nongnu.org; Mon, 10 May 2010 18:23:49 -0400 Received: from Debian-exim by eggs.gnu.org with spam-scanned (Exim 4.69) (envelope-from ) id 1OBbO1-00011o-T2 for qemu-devel@nongnu.org; Mon, 10 May 2010 18:23:46 -0400 Received: from are.twiddle.net ([75.149.56.221]:54498) by eggs.gnu.org with esmtp (Exim 4.69) (envelope-from ) id 1OBbNz-00011Q-Hz for qemu-devel@nongnu.org; Mon, 10 May 2010 18:23:41 -0400 Received: from anchor.twiddle.home (anchor.twiddle.home [172.31.0.4]) by are.twiddle.net (Postfix) with ESMTPS id EF06D263; Mon, 10 May 2010 15:23:38 -0700 (PDT) Received: from anchor.twiddle.home (anchor.twiddle.home [127.0.0.1]) by anchor.twiddle.home (8.14.4/8.14.4) with ESMTP id o4AMNcvv027795; Mon, 10 May 2010 15:23:38 -0700 Received: (from rth@localhost) by anchor.twiddle.home (8.14.4/8.14.4/Submit) id o4AMNbXK027794; Mon, 10 May 2010 15:23:37 -0700 From: Richard Henderson To: qemu-devel@nongnu.org Date: Mon, 10 May 2010 15:23:33 -0700 Message-Id: X-Mailer: git-send-email 1.7.0.1 In-Reply-To: References: In-Reply-To: References: X-detected-operating-system: by eggs.gnu.org: GNU/Linux 2.6 (newer, 2) Cc: blauwirbel@gmail.com Subject: [Qemu-devel] [PATCH 3/3] target-sparc: Inline some generation of carry for ADDX/SUBX. X-BeenThere: qemu-devel@nongnu.org X-Mailman-Version: 2.1.5 Precedence: list List-Id: qemu-devel.nongnu.org List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Sender: qemu-devel-bounces+incoming=patchwork.ozlabs.org@nongnu.org Errors-To: qemu-devel-bounces+incoming=patchwork.ozlabs.org@nongnu.org Computing carry is trivial for some inputs. By avoiding an external function call, we generate near-optimal code for the common cases of add+addx (double-word arithmetic) and cmp+addx (a setcc pattern). Signed-off-by: Richard Henderson --- target-sparc/helper.h | 2 +- target-sparc/op_helper.c | 2 +- target-sparc/translate.c | 268 +++++++++++++++++++++++++++++++++------------- 3 files changed, 196 insertions(+), 76 deletions(-) diff --git a/target-sparc/helper.h b/target-sparc/helper.h index 04c1306..6f103e7 100644 --- a/target-sparc/helper.h +++ b/target-sparc/helper.h @@ -158,6 +158,6 @@ VIS_CMPHELPER(cmpne); #undef VIS_HELPER #undef VIS_CMPHELPER DEF_HELPER_0(compute_psr, void); -DEF_HELPER_0(compute_C_icc, tl); +DEF_HELPER_0(compute_C_icc, i32); #include "def-helper.h" diff --git a/target-sparc/op_helper.c b/target-sparc/op_helper.c index c36bc54..3d6177b 100644 --- a/target-sparc/op_helper.c +++ b/target-sparc/op_helper.c @@ -1314,7 +1314,7 @@ void helper_compute_psr(void) CC_OP = CC_OP_FLAGS; } -target_ulong helper_compute_C_icc(void) +uint32_t helper_compute_C_icc(void) { uint32_t ret; diff --git a/target-sparc/translate.c b/target-sparc/translate.c index ea7c71b..06f0f34 100644 --- a/target-sparc/translate.c +++ b/target-sparc/translate.c @@ -332,24 +332,130 @@ static inline void gen_op_add_cc(TCGv dst, TCGv src1, TCGv src2) tcg_gen_mov_tl(dst, cpu_cc_dst); } -static inline void gen_op_addxi_cc(TCGv dst, TCGv src1, target_long src2) +static TCGv_i32 gen_add32_carry32(void) { - gen_helper_compute_C_icc(cpu_tmp0); - tcg_gen_mov_tl(cpu_cc_src, src1); - tcg_gen_movi_tl(cpu_cc_src2, src2); - tcg_gen_add_tl(cpu_cc_dst, cpu_cc_src, cpu_tmp0); - tcg_gen_addi_tl(cpu_cc_dst, cpu_cc_dst, src2); - tcg_gen_mov_tl(dst, cpu_cc_dst); + TCGv_i32 carry_32, cc_src1_32, cc_src2_32; + + /* Carry is computed from a previous add: (dst < src) */ +#if TARGET_LONG_BITS == 64 + cc_src1_32 = tcg_temp_new_i32(); + cc_src2_32 = tcg_temp_new_i32(); + tcg_gen_trunc_i64_i32(cc_src1_32, cpu_cc_dst); + tcg_gen_trunc_i64_i32(cc_src2_32, cpu_cc_src); +#else + cc_src1_32 = cpu_cc_dst; + cc_src2_32 = cpu_cc_src; +#endif + + carry_32 = tcg_temp_new_i32(); + tcg_gen_setcond_i32(TCG_COND_LTU, carry_32, cc_src1_32, cc_src2_32); + +#if TARGET_LONG_BITS == 64 + tcg_temp_free_i32(cc_src1_32); + tcg_temp_free_i32(cc_src2_32); +#endif + + return carry_32; } -static inline void gen_op_addx_cc(TCGv dst, TCGv src1, TCGv src2) +static TCGv_i32 gen_sub32_carry32(void) { - gen_helper_compute_C_icc(cpu_tmp0); - tcg_gen_mov_tl(cpu_cc_src, src1); - tcg_gen_mov_tl(cpu_cc_src2, src2); - tcg_gen_add_tl(cpu_cc_dst, cpu_cc_src, cpu_tmp0); - tcg_gen_add_tl(cpu_cc_dst, cpu_cc_dst, cpu_cc_src2); - tcg_gen_mov_tl(dst, cpu_cc_dst); + TCGv_i32 carry_32, cc_src1_32, cc_src2_32; + + /* Carry is computed from a previous borrow: (src1 < src2) */ +#if TARGET_LONG_BITS == 64 + cc_src1_32 = tcg_temp_new_i32(); + cc_src2_32 = tcg_temp_new_i32(); + tcg_gen_trunc_i64_i32(cc_src1_32, cpu_cc_src); + tcg_gen_trunc_i64_i32(cc_src2_32, cpu_cc_src2); +#else + cc_src1_32 = cpu_cc_src; + cc_src2_32 = cpu_cc_src2; +#endif + + carry_32 = tcg_temp_new_i32(); + tcg_gen_setcond_i32(TCG_COND_LTU, carry_32, cc_src1_32, cc_src2_32); + +#if TARGET_LONG_BITS == 64 + tcg_temp_free_i32(cc_src1_32); + tcg_temp_free_i32(cc_src2_32); +#endif + + return carry_32; +} + +static void gen_op_addx_int(DisasContext *dc, TCGv dst, TCGv src1, + TCGv src2, int update_cc) +{ + TCGv_i32 carry_32; + TCGv carry; + + switch (dc->cc_op) { + case CC_OP_DIV: + case CC_OP_LOGIC: + /* Carry is known to be zero. Fall back to plain ADD. */ + if (update_cc) { + gen_op_add_cc(dst, src1, src2); + } else { + tcg_gen_add_tl(dst, src1, src2); + } + return; + + case CC_OP_ADD: + case CC_OP_TADD: + case CC_OP_TADDTV: +#if TCG_TARGET_REG_BITS == 32 && TARGET_LONG_BITS == 32 + { + /* For 32-bit hosts, we can re-use the host's hardware carry + generation by using an ADD2 opcode. We discard the low + part of the output. Ideally we'd combine this operation + with the add that generated the carry in the first place. */ + TCGv dst_low = tcg_temp_new(); + tcg_gen_op6_i32(INDEX_op_add2_i32, dst_low, dst, + cpu_cc_src, src1, cpu_cc_src2, src2); + tcg_temp_free(dst_low); + goto add_done; + } +#endif + carry_32 = gen_add32_carry32(); + break; + + case CC_OP_SUB: + case CC_OP_TSUB: + case CC_OP_TSUBTV: + carry_32 = gen_sub32_carry32(); + break; + + default: + /* We need external help to produce the carry. */ + carry_32 = tcg_temp_new_i32(); + gen_helper_compute_C_icc(carry_32); + break; + } + +#if TARGET_LONG_BITS == 64 + carry = tcg_temp_new(); + tcg_gen_extu_i32_i64(carry, carry_32); +#else + carry = carry_32; +#endif + + tcg_gen_add_tl(dst, src1, src2); + tcg_gen_add_tl(dst, dst, carry); + + tcg_temp_free_i32(carry_32); +#if TARGET_LONG_BITS == 64 + tcg_temp_free(carry); +#endif + +#if TCG_TARGET_REG_BITS == 32 && TARGET_LONG_BITS == 32 + add_done: +#endif + if (update_cc) { + tcg_gen_mov_tl(cpu_cc_src, src1); + tcg_gen_mov_tl(cpu_cc_src2, src2); + tcg_gen_mov_tl(cpu_cc_dst, dst); + } } static inline void gen_op_tadd_cc(TCGv dst, TCGv src1, TCGv src2) @@ -415,24 +521,78 @@ static inline void gen_op_sub_cc(TCGv dst, TCGv src1, TCGv src2) tcg_gen_mov_tl(dst, cpu_cc_dst); } -static inline void gen_op_subxi_cc(TCGv dst, TCGv src1, target_long src2) +static void gen_op_subx_int(DisasContext *dc, TCGv dst, TCGv src1, + TCGv src2, int update_cc) { - gen_helper_compute_C_icc(cpu_tmp0); - tcg_gen_mov_tl(cpu_cc_src, src1); - tcg_gen_movi_tl(cpu_cc_src2, src2); - tcg_gen_sub_tl(cpu_cc_dst, cpu_cc_src, cpu_tmp0); - tcg_gen_subi_tl(cpu_cc_dst, cpu_cc_dst, src2); - tcg_gen_mov_tl(dst, cpu_cc_dst); -} + TCGv_i32 carry_32; + TCGv carry; -static inline void gen_op_subx_cc(TCGv dst, TCGv src1, TCGv src2) -{ - gen_helper_compute_C_icc(cpu_tmp0); - tcg_gen_mov_tl(cpu_cc_src, src1); - tcg_gen_mov_tl(cpu_cc_src2, src2); - tcg_gen_sub_tl(cpu_cc_dst, cpu_cc_src, cpu_tmp0); - tcg_gen_sub_tl(cpu_cc_dst, cpu_cc_dst, cpu_cc_src2); - tcg_gen_mov_tl(dst, cpu_cc_dst); + switch (dc->cc_op) { + case CC_OP_DIV: + case CC_OP_LOGIC: + /* Carry is known to be zero. Fall back to plain SUB. */ + if (update_cc) { + gen_op_sub_cc(dst, src1, src2); + } else { + tcg_gen_sub_tl(dst, src1, src2); + } + return; + + case CC_OP_ADD: + case CC_OP_TADD: + case CC_OP_TADDTV: + carry_32 = gen_add32_carry32(); + break; + + case CC_OP_SUB: + case CC_OP_TSUB: + case CC_OP_TSUBTV: +#if TCG_TARGET_REG_BITS == 32 && TARGET_LONG_BITS == 32 + { + /* For 32-bit hosts, we can re-use the host's hardware carry + generation by using a SUB2 opcode. We discard the low + part of the output. Ideally we'd combine this operation + with the add that generated the carry in the first place. */ + TCGv dst_low = tcg_temp_new(); + tcg_gen_op6_i32(INDEX_op_sub2_i32, dst_low, dst, + cpu_cc_src, src1, cpu_cc_src2, src2); + tcg_temp_free(dst_low); + goto sub_done; + } +#endif + carry_32 = gen_sub32_carry32(); + break; + + default: + /* We need external help to produce the carry. */ + carry_32 = tcg_temp_new_i32(); + gen_helper_compute_C_icc(carry_32); + break; + } + +#if TARGET_LONG_BITS == 64 + carry = tcg_temp_new(); + tcg_gen_extu_i32_i64(carry, carry_32); +#else + carry = carry_32; +#endif + + tcg_gen_sub_tl(dst, src1, src2); + tcg_gen_sub_tl(dst, dst, carry); + + tcg_temp_free_i32(carry_32); +#if TARGET_LONG_BITS == 64 + tcg_temp_free(carry); +#endif + +#if TCG_TARGET_REG_BITS == 32 && TARGET_LONG_BITS == 32 + sub_done: +#endif + if (update_cc) { + tcg_gen_mov_tl(cpu_cc_src, src1); + tcg_gen_mov_tl(cpu_cc_src2, src2); + tcg_gen_mov_tl(cpu_cc_dst, dst); + } } static inline void gen_op_tsub_cc(TCGv dst, TCGv src1, TCGv src2) @@ -2950,28 +3110,8 @@ static void disas_sparc_insn(DisasContext * dc) } break; case 0x8: /* addx, V9 addc */ - if (IS_IMM) { - simm = GET_FIELDs(insn, 19, 31); - if (xop & 0x10) { - gen_op_addxi_cc(cpu_dst, cpu_src1, simm); - tcg_gen_movi_i32(cpu_cc_op, CC_OP_ADDX); - dc->cc_op = CC_OP_ADDX; - } else { - gen_helper_compute_C_icc(cpu_tmp0); - tcg_gen_addi_tl(cpu_tmp0, cpu_tmp0, simm); - tcg_gen_add_tl(cpu_dst, cpu_src1, cpu_tmp0); - } - } else { - if (xop & 0x10) { - gen_op_addx_cc(cpu_dst, cpu_src1, cpu_src2); - tcg_gen_movi_i32(cpu_cc_op, CC_OP_ADDX); - dc->cc_op = CC_OP_ADDX; - } else { - gen_helper_compute_C_icc(cpu_tmp0); - tcg_gen_add_tl(cpu_tmp0, cpu_src2, cpu_tmp0); - tcg_gen_add_tl(cpu_dst, cpu_src1, cpu_tmp0); - } - } + gen_op_addx_int(dc, cpu_dst, cpu_src1, cpu_src2, + (xop & 0x10)); break; #ifdef TARGET_SPARC64 case 0x9: /* V9 mulx */ @@ -3002,28 +3142,8 @@ static void disas_sparc_insn(DisasContext * dc) } break; case 0xc: /* subx, V9 subc */ - if (IS_IMM) { - simm = GET_FIELDs(insn, 19, 31); - if (xop & 0x10) { - gen_op_subxi_cc(cpu_dst, cpu_src1, simm); - tcg_gen_movi_i32(cpu_cc_op, CC_OP_SUBX); - dc->cc_op = CC_OP_SUBX; - } else { - gen_helper_compute_C_icc(cpu_tmp0); - tcg_gen_addi_tl(cpu_tmp0, cpu_tmp0, simm); - tcg_gen_sub_tl(cpu_dst, cpu_src1, cpu_tmp0); - } - } else { - if (xop & 0x10) { - gen_op_subx_cc(cpu_dst, cpu_src1, cpu_src2); - tcg_gen_movi_i32(cpu_cc_op, CC_OP_SUBX); - dc->cc_op = CC_OP_SUBX; - } else { - gen_helper_compute_C_icc(cpu_tmp0); - tcg_gen_add_tl(cpu_tmp0, cpu_src2, cpu_tmp0); - tcg_gen_sub_tl(cpu_dst, cpu_src1, cpu_tmp0); - } - } + gen_op_subx_int(dc, cpu_dst, cpu_src1, cpu_src2, + (xop & 0x10)); break; #ifdef TARGET_SPARC64 case 0xd: /* V9 udivx */