From patchwork Wed Dec 16 23:28:33 2009 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Richard Henderson X-Patchwork-Id: 41297 Return-Path: X-Original-To: incoming@patchwork.ozlabs.org Delivered-To: patchwork-incoming@bilbo.ozlabs.org Received: from lists.gnu.org (lists.gnu.org [199.232.76.165]) (using TLSv1 with cipher DHE-RSA-AES256-SHA (256/256 bits)) (Client did not present a certificate) by ozlabs.org (Postfix) with ESMTPS id 25134B6F15 for ; Thu, 17 Dec 2009 12:50:05 +1100 (EST) Received: from localhost ([127.0.0.1]:34248 helo=lists.gnu.org) by lists.gnu.org with esmtp (Exim 4.43) id 1NL5VB-0002Ry-MS for incoming@patchwork.ozlabs.org; Wed, 16 Dec 2009 20:50:01 -0500 Received: from mailman by lists.gnu.org with tmda-scanned (Exim 4.43) id 1NL5Mk-0006HL-5z for qemu-devel@nongnu.org; Wed, 16 Dec 2009 20:41:18 -0500 Received: from exim by lists.gnu.org with spam-scanned (Exim 4.43) id 1NL5Mj-0006Fk-HL for qemu-devel@nongnu.org; Wed, 16 Dec 2009 20:41:17 -0500 Received: from [199.232.76.173] (port=53949 helo=monty-python.gnu.org) by lists.gnu.org with esmtp (Exim 4.43) id 1NL5Mi-0006FX-UV for qemu-devel@nongnu.org; Wed, 16 Dec 2009 20:41:17 -0500 Received: from are.twiddle.net ([75.149.56.221]:37542) by monty-python.gnu.org with esmtp (Exim 4.60) (envelope-from ) id 1NL5Mi-0001tP-Cv for qemu-devel@nongnu.org; Wed, 16 Dec 2009 20:41:16 -0500 Received: by are.twiddle.net (Postfix, from userid 5000) id 05BFAD8A; Wed, 16 Dec 2009 17:41:16 -0800 (PST) Message-Id: <7f2f6a0d55d605bdef296aa6a8cf3c57951bbbdb.1261012798.git.rth@twiddle.net> In-Reply-To: References: From: Richard Henderson Date: Wed, 16 Dec 2009 15:28:33 -0800 To: qemu-devel@nongnu.org X-detected-operating-system: by monty-python.gnu.org: GNU/Linux 2.6 (newer, 2) Subject: [Qemu-devel] [PATCH 6/7] target-i386: Use setcond and movcond. X-BeenThere: qemu-devel@nongnu.org X-Mailman-Version: 2.1.5 Precedence: list List-Id: qemu-devel.nongnu.org List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Sender: qemu-devel-bounces+incoming=patchwork.ozlabs.org@nongnu.org Errors-To: qemu-devel-bounces+incoming=patchwork.ozlabs.org@nongnu.org Splits out the condition code handling into a new function that's directly callable from setcc and cmov expanders. From there we can directly emit the operation we care about. Signed-off-by: Richard Henderson --- target-i386/translate.c | 192 +++++++++++++++++++++------------------------- tcg/tcg-op.h | 4 + 2 files changed, 92 insertions(+), 104 deletions(-) diff --git a/target-i386/translate.c b/target-i386/translate.c index 64bc0a3..b29141b 100644 --- a/target-i386/translate.c +++ b/target-i386/translate.c @@ -888,64 +888,28 @@ static inline void gen_setcc_slow_T0(DisasContext *s, int jcc_op) } } -/* return true if setcc_slow is not needed (WARNING: must be kept in - sync with gen_jcc1) */ -static int is_fast_jcc_case(DisasContext *s, int b) +typedef struct { - int jcc_op; - jcc_op = (b >> 1) & 7; - switch(s->cc_op) { - /* we optimize the cmp/jcc case */ - case CC_OP_SUBB: - case CC_OP_SUBW: - case CC_OP_SUBL: - case CC_OP_SUBQ: - if (jcc_op == JCC_O || jcc_op == JCC_P) - goto slow_jcc; - break; - - /* some jumps are easy to compute */ - case CC_OP_ADDB: - case CC_OP_ADDW: - case CC_OP_ADDL: - case CC_OP_ADDQ: - - case CC_OP_LOGICB: - case CC_OP_LOGICW: - case CC_OP_LOGICL: - case CC_OP_LOGICQ: - - case CC_OP_INCB: - case CC_OP_INCW: - case CC_OP_INCL: - case CC_OP_INCQ: - - case CC_OP_DECB: - case CC_OP_DECW: - case CC_OP_DECL: - case CC_OP_DECQ: - - case CC_OP_SHLB: - case CC_OP_SHLW: - case CC_OP_SHLL: - case CC_OP_SHLQ: - if (jcc_op != JCC_Z && jcc_op != JCC_S) - goto slow_jcc; - break; - default: - slow_jcc: - return 0; - } - return 1; -} + TCGCond cond; + _Bool op1_z; + _Bool slow_T0; + TCGv op0, op1; +} jcc2_result; -/* generate a conditional jump to label 'l1' according to jump opcode - value 'b'. In the fast case, T0 is guaranted not to be used. */ -static inline void gen_jcc1(DisasContext *s, int cc_op, int b, int l1) +/* Evaluate a conditional according to jump opcode value 'b'. + In the fast case, T0 is guaranted not to be used. */ +static inline jcc2_result gen_jcc2(DisasContext *s, int cc_op, int b) { + jcc2_result ret; int inv, jcc_op, size, cond; TCGv t0; + ret.cond = -1; + ret.op1_z = 0; + ret.slow_T0 = 0; + TCGV_UNUSED(ret.op0); + TCGV_UNUSED(ret.op1); + inv = b & 1; jcc_op = (b >> 1) & 7; @@ -979,31 +943,37 @@ static inline void gen_jcc1(DisasContext *s, int cc_op, int b, int l1) t0 = cpu_cc_dst; break; } - tcg_gen_brcondi_tl(inv ? TCG_COND_NE : TCG_COND_EQ, t0, 0, l1); + ret.cond = inv ? TCG_COND_NE : TCG_COND_EQ; + ret.op0 = t0; + ret.op1_z = 1; break; case JCC_S: fast_jcc_s: switch(size) { case 0: tcg_gen_andi_tl(cpu_tmp0, cpu_cc_dst, 0x80); - tcg_gen_brcondi_tl(inv ? TCG_COND_EQ : TCG_COND_NE, cpu_tmp0, - 0, l1); + ret.cond = inv ? TCG_COND_EQ : TCG_COND_NE; + ret.op0 = cpu_tmp0; + ret.op1_z = 1; break; case 1: tcg_gen_andi_tl(cpu_tmp0, cpu_cc_dst, 0x8000); - tcg_gen_brcondi_tl(inv ? TCG_COND_EQ : TCG_COND_NE, cpu_tmp0, - 0, l1); + ret.cond = inv ? TCG_COND_EQ : TCG_COND_NE; + ret.op0 = cpu_tmp0; + ret.op1_z = 1; break; #ifdef TARGET_X86_64 case 2: tcg_gen_andi_tl(cpu_tmp0, cpu_cc_dst, 0x80000000); - tcg_gen_brcondi_tl(inv ? TCG_COND_EQ : TCG_COND_NE, cpu_tmp0, - 0, l1); + ret.cond = inv ? TCG_COND_EQ : TCG_COND_NE; + ret.op0 = cpu_tmp0; + ret.op1_z = 1; break; #endif default: - tcg_gen_brcondi_tl(inv ? TCG_COND_GE : TCG_COND_LT, cpu_cc_dst, - 0, l1); + ret.cond = inv ? TCG_COND_GE : TCG_COND_LT; + ret.op0 = cpu_cc_dst; + ret.op1_z = 1; break; } break; @@ -1037,7 +1007,9 @@ static inline void gen_jcc1(DisasContext *s, int cc_op, int b, int l1) t0 = cpu_cc_src; break; } - tcg_gen_brcond_tl(cond, cpu_tmp4, t0, l1); + ret.cond = cond; + ret.op0 = cpu_tmp4; + ret.op1 = t0; break; case JCC_L: @@ -1069,7 +1041,9 @@ static inline void gen_jcc1(DisasContext *s, int cc_op, int b, int l1) t0 = cpu_cc_src; break; } - tcg_gen_brcond_tl(cond, cpu_tmp4, t0, l1); + ret.cond = cond; + ret.op0 = cpu_tmp4; + ret.op1 = t0; break; default: @@ -1131,12 +1105,28 @@ static inline void gen_jcc1(DisasContext *s, int cc_op, int b, int l1) default: slow_jcc: gen_setcc_slow_T0(s, jcc_op); - tcg_gen_brcondi_tl(inv ? TCG_COND_EQ : TCG_COND_NE, - cpu_T[0], 0, l1); - break; + ret.cond = inv ? TCG_COND_EQ : TCG_COND_NE; + ret.op0 = cpu_T[0]; + ret.op1_z = 1; + ret.slow_T0 = 1; + break; } + + return ret; } +/* Generate a conditional jump to label 'l1' according to jump opcode + value 'b'. In the fast case, T0 is guaranted not to be used. */ +static inline void gen_jcc1(DisasContext *s, int cc_op, int b, int l1) +{ + jcc2_result cmp = gen_jcc2(s, cc_op, b); + if (cmp.op1_z) + tcg_gen_brcondi_tl(cmp.cond, cmp.op0, 0, l1); + else + tcg_gen_brcond_tl(cmp.cond, cmp.op0, cmp.op1, l1); +} + + /* XXX: does not work with gdbstub "ice" single step - not a serious problem */ static int gen_jz_ecx_string(DisasContext *s, target_ulong next_eip) @@ -2342,30 +2332,24 @@ static inline void gen_jcc(DisasContext *s, int b, static void gen_setcc(DisasContext *s, int b) { - int inv, jcc_op, l1; - TCGv t0; + jcc2_result cmp = gen_jcc2(s, s->cc_op, b); - if (is_fast_jcc_case(s, b)) { - /* nominal case: we use a jump */ - /* XXX: make it faster by adding new instructions in TCG */ - t0 = tcg_temp_local_new(); - tcg_gen_movi_tl(t0, 0); - l1 = gen_new_label(); - gen_jcc1(s, s->cc_op, b ^ 1, l1); - tcg_gen_movi_tl(t0, 1); - gen_set_label(l1); - tcg_gen_mov_tl(cpu_T[0], t0); - tcg_temp_free(t0); - } else { - /* slow case: it is more efficient not to generate a jump, - although it is questionnable whether this optimization is - worth to */ - inv = b & 1; - jcc_op = (b >> 1) & 7; - gen_setcc_slow_T0(s, jcc_op); - if (inv) { + if (cmp.slow_T0) { + /* Slow case: Note that we've already called gen_setcc_slow_T0 + inside gen_jcc2, which resulted in a boolean value being placed + into cpu_T[0]. Note also that EQ equates to inversion. */ + if (cmp.cond == TCG_COND_EQ) { tcg_gen_xori_tl(cpu_T[0], cpu_T[0], 1); } + } else { + /* Fast case: We've computed some values that need to be + compared directly. */ + TCGv op1 = cmp.op1; + if (cmp.op1_z) + op1 = tcg_const_tl(0); + tcg_gen_setcond_tl(cmp.cond, cpu_T[0], cmp.op0, op1); + if (cmp.op1_z) + tcg_temp_free(op1); } } @@ -6335,14 +6319,14 @@ static target_ulong disas_insn(DisasContext *s, target_ulong pc_start) break; case 0x140 ... 0x14f: /* cmov Gv, Ev */ { - int l1; - TCGv t0; + TCGv t0, op1; + jcc2_result cmp; ot = dflag + OT_WORD; modrm = ldub_code(s->pc++); reg = ((modrm >> 3) & 7) | rex_r; mod = (modrm >> 6) & 3; - t0 = tcg_temp_local_new(); + t0 = tcg_temp_new(); if (mod != 3) { gen_lea_modrm(s, modrm, ®_addr, &offset_addr); gen_op_ld_v(ot + s->mem_index, t0, cpu_A0); @@ -6350,23 +6334,23 @@ static target_ulong disas_insn(DisasContext *s, target_ulong pc_start) rm = (modrm & 7) | REX_B(s); gen_op_mov_v_reg(ot, t0, rm); } + + cmp = gen_jcc2(s, s->cc_op, b); + + op1 = cmp.op1; + if (cmp.op1_z) + op1 = tcg_const_tl(0); + tcg_gen_movcond_tl(cmp.cond, cpu_regs[reg], + cmp.op0, op1, t0, cpu_regs[reg]); + if (cmp.op1_z) + tcg_temp_free(op1); + tcg_temp_free(t0); + #ifdef TARGET_X86_64 if (ot == OT_LONG) { - /* XXX: specific Intel behaviour ? */ - l1 = gen_new_label(); - gen_jcc1(s, s->cc_op, b ^ 1, l1); - tcg_gen_mov_tl(cpu_regs[reg], t0); - gen_set_label(l1); tcg_gen_ext32u_tl(cpu_regs[reg], cpu_regs[reg]); - } else -#endif - { - l1 = gen_new_label(); - gen_jcc1(s, s->cc_op, b ^ 1, l1); - gen_op_mov_reg_v(ot, reg, t0); - gen_set_label(l1); } - tcg_temp_free(t0); +#endif } break; diff --git a/tcg/tcg-op.h b/tcg/tcg-op.h index 4d0fec0..4db44b6 100644 --- a/tcg/tcg-op.h +++ b/tcg/tcg-op.h @@ -2154,6 +2154,8 @@ static inline void tcg_gen_qemu_st64(TCGv_i64 arg, TCGv addr, int mem_index) #define tcg_gen_sari_tl tcg_gen_sari_i64 #define tcg_gen_brcond_tl tcg_gen_brcond_i64 #define tcg_gen_brcondi_tl tcg_gen_brcondi_i64 +#define tcg_gen_setcond_tl tcg_gen_setcond_i64 +#define tcg_gen_movcond_tl tcg_gen_movcond_i64 #define tcg_gen_mul_tl tcg_gen_mul_i64 #define tcg_gen_muli_tl tcg_gen_muli_i64 #define tcg_gen_div_tl tcg_gen_div_i64 @@ -2224,6 +2226,8 @@ static inline void tcg_gen_qemu_st64(TCGv_i64 arg, TCGv addr, int mem_index) #define tcg_gen_sari_tl tcg_gen_sari_i32 #define tcg_gen_brcond_tl tcg_gen_brcond_i32 #define tcg_gen_brcondi_tl tcg_gen_brcondi_i32 +#define tcg_gen_setcond_tl tcg_gen_setcond_i32 +#define tcg_gen_movcond_tl tcg_gen_movcond_i32 #define tcg_gen_mul_tl tcg_gen_mul_i32 #define tcg_gen_muli_tl tcg_gen_muli_i32 #define tcg_gen_div_tl tcg_gen_div_i32