From patchwork Wed Dec 16 23:17:10 2009 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Richard Henderson X-Patchwork-Id: 41296 Return-Path: X-Original-To: incoming@patchwork.ozlabs.org Delivered-To: patchwork-incoming@bilbo.ozlabs.org Received: from lists.gnu.org (lists.gnu.org [199.232.76.165]) (using TLSv1 with cipher DHE-RSA-AES256-SHA (256/256 bits)) (Client did not present a certificate) by ozlabs.org (Postfix) with ESMTPS id 05850B6F16 for ; Thu, 17 Dec 2009 12:48:09 +1100 (EST) Received: from localhost ([127.0.0.1]:33197 helo=lists.gnu.org) by lists.gnu.org with esmtp (Exim 4.43) id 1NL5TI-0001Lp-G6 for incoming@patchwork.ozlabs.org; Wed, 16 Dec 2009 20:48:04 -0500 Received: from mailman by lists.gnu.org with tmda-scanned (Exim 4.43) id 1NL5Mi-0006FW-UK for qemu-devel@nongnu.org; Wed, 16 Dec 2009 20:41:17 -0500 Received: from exim by lists.gnu.org with spam-scanned (Exim 4.43) id 1NL5Mh-0006Dy-Gi for qemu-devel@nongnu.org; Wed, 16 Dec 2009 20:41:16 -0500 Received: from [199.232.76.173] (port=53947 helo=monty-python.gnu.org) by lists.gnu.org with esmtp (Exim 4.43) id 1NL5Mh-0006De-8R for qemu-devel@nongnu.org; Wed, 16 Dec 2009 20:41:15 -0500 Received: from are.twiddle.net ([75.149.56.221]:37542) by monty-python.gnu.org with esmtp (Exim 4.60) (envelope-from ) id 1NL5Mg-0001tP-Hk for qemu-devel@nongnu.org; Wed, 16 Dec 2009 20:41:15 -0500 Received: by are.twiddle.net (Postfix, from userid 5000) id E2F94D8A; Wed, 16 Dec 2009 17:41:13 -0800 (PST) Message-Id: <32ca61571b6345f89337bfef816e2606b3995f68.1261012798.git.rth@twiddle.net> In-Reply-To: References: From: Richard Henderson Date: Wed, 16 Dec 2009 15:17:10 -0800 To: qemu-devel@nongnu.org X-detected-operating-system: by monty-python.gnu.org: GNU/Linux 2.6 (newer, 2) Subject: [Qemu-devel] [PATCH 4/7] tcg-i386: Implement setcond, movcond, setcond2. X-BeenThere: qemu-devel@nongnu.org X-Mailman-Version: 2.1.5 Precedence: list List-Id: qemu-devel.nongnu.org List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Sender: qemu-devel-bounces+incoming=patchwork.ozlabs.org@nongnu.org Errors-To: qemu-devel-bounces+incoming=patchwork.ozlabs.org@nongnu.org An initial cut at conditional moves for the i386 backend. Signed-off-by: Richard Henderson --- elf.h | 2 + tcg/i386/tcg-target.c | 280 ++++++++++++++++++++++++++++++++++++++++--------- 2 files changed, 233 insertions(+), 49 deletions(-) diff --git a/elf.h b/elf.h index 11674d7..c84c8ab 100644 --- a/elf.h +++ b/elf.h @@ -243,6 +243,8 @@ typedef struct { #define R_386_GOTOFF 9 #define R_386_GOTPC 10 #define R_386_NUM 11 +/* Not a dynamic reloc, so not included in R_386_NUM. Used in TCG. */ +#define R_386_PC8 23 #define R_MIPS_NONE 0 #define R_MIPS_16 1 diff --git a/tcg/i386/tcg-target.c b/tcg/i386/tcg-target.c index 972b102..90dbbe9 100644 --- a/tcg/i386/tcg-target.c +++ b/tcg/i386/tcg-target.c @@ -61,6 +61,9 @@ static void patch_reloc(uint8_t *code_ptr, int type, case R_386_PC32: *(uint32_t *)code_ptr = value - (long)code_ptr; break; + case R_386_PC8: + *(uint8_t *)code_ptr = value - (long)code_ptr; + break; default: tcg_abort(); } @@ -305,7 +308,8 @@ static void tcg_out_addi(TCGContext *s, int reg, tcg_target_long val) tgen_arithi(s, ARITH_ADD, reg, val, 0); } -static void tcg_out_jxx(TCGContext *s, int opc, int label_index) +/* Use SMALL != 0 to force a short forward branch. */ +static void tcg_out_jxx(TCGContext *s, int opc, int label_index, int small) { int32_t val, val1; TCGLabel *l = &s->labels[label_index]; @@ -320,6 +324,7 @@ static void tcg_out_jxx(TCGContext *s, int opc, int label_index) tcg_out8(s, 0x70 + opc); tcg_out8(s, val1); } else { + assert (!small); if (opc == -1) { tcg_out8(s, 0xe9); tcg_out32(s, val - 5); @@ -329,6 +334,15 @@ static void tcg_out_jxx(TCGContext *s, int opc, int label_index) tcg_out32(s, val - 6); } } + } else if (small) { + if (opc == -1) { + tcg_out8(s, 0xeb); + } else { + tcg_out8(s, 0x0f); + tcg_out8(s, 0x70 + opc); + } + tcg_out_reloc(s, s->code_ptr, R_386_PC8, label_index, -1); + s->code_ptr += 1; } else { if (opc == -1) { tcg_out8(s, 0xe9); @@ -341,9 +355,8 @@ static void tcg_out_jxx(TCGContext *s, int opc, int label_index) } } -static void tcg_out_brcond(TCGContext *s, int cond, - TCGArg arg1, TCGArg arg2, int const_arg2, - int label_index) +static void tcg_out_cond(TCGContext *s, int cond, + TCGArg arg1, TCGArg arg2, int const_arg2) { if (const_arg2) { if (arg2 == 0) { @@ -355,71 +368,225 @@ static void tcg_out_brcond(TCGContext *s, int cond, } else { tcg_out_modrm(s, 0x01 | (ARITH_CMP << 3), arg2, arg1); } - tcg_out_jxx(s, tcg_cond_to_jcc[cond], label_index); +} + +static void tcg_out_brcond(TCGContext *s, int cond, + TCGArg arg1, TCGArg arg2, int const_arg2, + int label_index, int small) +{ + tcg_out_cond(s, cond, arg1, arg2, const_arg2); + tcg_out_jxx(s, tcg_cond_to_jcc[cond], label_index, small); } /* XXX: we implement it at the target level to avoid having to handle cross basic blocks temporaries */ -static void tcg_out_brcond2(TCGContext *s, - const TCGArg *args, const int *const_args) +static void tcg_out_brcond2(TCGContext *s, const TCGArg *args, + const int *const_args, int small) { - int label_next; - label_next = gen_new_label(); - switch(args[4]) { + int label_next = gen_new_label(); + int label_dest = args[5]; + int cond = args[4], c1, c2, c3; + + switch (cond) { case TCG_COND_EQ: - tcg_out_brcond(s, TCG_COND_NE, args[0], args[2], const_args[2], label_next); - tcg_out_brcond(s, TCG_COND_EQ, args[1], args[3], const_args[3], args[5]); + c1 = -1, c2 = TCG_COND_NE, c3 = TCG_COND_EQ; break; case TCG_COND_NE: - tcg_out_brcond(s, TCG_COND_NE, args[0], args[2], const_args[2], args[5]); - tcg_out_brcond(s, TCG_COND_NE, args[1], args[3], const_args[3], args[5]); + c1 = TCG_COND_NE, c2 = -1, c3 = TCG_COND_NE; break; case TCG_COND_LT: - tcg_out_brcond(s, TCG_COND_LT, args[1], args[3], const_args[3], args[5]); - tcg_out_jxx(s, JCC_JNE, label_next); - tcg_out_brcond(s, TCG_COND_LTU, args[0], args[2], const_args[2], args[5]); - break; - case TCG_COND_LE: - tcg_out_brcond(s, TCG_COND_LT, args[1], args[3], const_args[3], args[5]); - tcg_out_jxx(s, JCC_JNE, label_next); - tcg_out_brcond(s, TCG_COND_LEU, args[0], args[2], const_args[2], args[5]); - break; - case TCG_COND_GT: - tcg_out_brcond(s, TCG_COND_GT, args[1], args[3], const_args[3], args[5]); - tcg_out_jxx(s, JCC_JNE, label_next); - tcg_out_brcond(s, TCG_COND_GTU, args[0], args[2], const_args[2], args[5]); - break; - case TCG_COND_GE: - tcg_out_brcond(s, TCG_COND_GT, args[1], args[3], const_args[3], args[5]); - tcg_out_jxx(s, JCC_JNE, label_next); - tcg_out_brcond(s, TCG_COND_GEU, args[0], args[2], const_args[2], args[5]); - break; case TCG_COND_LTU: - tcg_out_brcond(s, TCG_COND_LTU, args[1], args[3], const_args[3], args[5]); - tcg_out_jxx(s, JCC_JNE, label_next); - tcg_out_brcond(s, TCG_COND_LTU, args[0], args[2], const_args[2], args[5]); + c1 = cond, c2 = TCG_COND_NE, c3 = TCG_COND_LTU; break; + case TCG_COND_LE: case TCG_COND_LEU: - tcg_out_brcond(s, TCG_COND_LTU, args[1], args[3], const_args[3], args[5]); - tcg_out_jxx(s, JCC_JNE, label_next); - tcg_out_brcond(s, TCG_COND_LEU, args[0], args[2], const_args[2], args[5]); + c1 = cond, c2 = TCG_COND_NE, c3 = TCG_COND_LEU; break; + case TCG_COND_GT: case TCG_COND_GTU: - tcg_out_brcond(s, TCG_COND_GTU, args[1], args[3], const_args[3], args[5]); - tcg_out_jxx(s, JCC_JNE, label_next); - tcg_out_brcond(s, TCG_COND_GTU, args[0], args[2], const_args[2], args[5]); + c1 = cond, c2 = TCG_COND_NE, c3 = TCG_COND_GTU; break; + case TCG_COND_GE: case TCG_COND_GEU: - tcg_out_brcond(s, TCG_COND_GTU, args[1], args[3], const_args[3], args[5]); - tcg_out_jxx(s, JCC_JNE, label_next); - tcg_out_brcond(s, TCG_COND_GEU, args[0], args[2], const_args[2], args[5]); + c1 = cond, c2 = TCG_COND_NE, c3 = TCG_COND_GEU; break; default: - tcg_abort(); + tcg_abort (); + } + + tcg_out_cond(s, cond, args[1], args[3], const_args[3]); + if (c1 != -1) { + tcg_out_jxx(s, tcg_cond_to_jcc[c1], label_dest, small); + } + if (c2 != -1) { + tcg_out_jxx(s, tcg_cond_to_jcc[c2], label_next, 1); } + tcg_out_brcond(s, c3, args[0], args[2], const_args[2], label_dest, small); + tcg_out_label(s, label_next, (tcg_target_long)s->code_ptr); } +static void tcg_out_setcond(TCGContext *s, int cond, TCGArg arg0, + TCGArg arg1, TCGArg arg2, int const_arg2) +{ + int use_xor = (arg0 != arg1 && (const_arg2 || arg0 != arg2)); + + if (use_xor) + tcg_out_movi(s, TCG_TYPE_I32, arg0, 0); + tcg_out_cond(s, cond, arg1, arg2, const_arg2); + tcg_out_modrm(s, 0x90 | tcg_cond_to_jcc[cond] | P_EXT, 0, arg0); + if (!use_xor) + tgen_arithi(s, ARITH_AND, arg0, 0xff, 0); +} + +static void tcg_out_setcond2(TCGContext *s, const TCGArg *args, + const int *const_args) +{ + int overlapl, overlaph; + TCGArg new_args[6]; + int label_true, label_over; + + overlapl = (args[0] == args[1] || (!const_args[3] && args[0] == args[3])); + overlaph = (args[0] == args[2] || (!const_args[4] && args[0] == args[4])); + memcpy(new_args, args+1, 5*sizeof(TCGArg)); + + if (!overlapl && !overlaph) { + /* ??? For EQ and NE, and output register in 'q', we could + implement this as cmp lows; setb %al; cmp highs; setb %ah; + andb %ah, %al; movzbl %al, %eax it's not clear it's worth + it though. */ + + /* When possible, clear the destination first and increment in + the true case. This results in smaller code than the + general case below. */ + tcg_out_movi(s, TCG_TYPE_I32, args[0], 0); + + label_over = gen_new_label(); + new_args[5] = label_over; + tcg_out_brcond2(s, new_args, const_args+1, 1); + + tgen_arithi(s, ARITH_ADD, args[0], 1, 0); + tcg_out_label(s, label_over, (tcg_target_long)s->code_ptr); + } else { + label_true = gen_new_label(); + label_over = gen_new_label(); + + new_args[5] = label_true; + tcg_out_brcond2(s, new_args, const_args+1, 1); + + tcg_out_movi(s, TCG_TYPE_I32, args[0], 0); + tcg_out_jxx(s, JCC_JMP, label_over, 1); + tcg_out_label(s, label_true, (tcg_target_long)s->code_ptr); + + tcg_out_movi(s, TCG_TYPE_I32, args[0], 1); + tcg_out_label(s, label_over, (tcg_target_long)s->code_ptr); + } +} + +static inline int have_cmov(void) +{ +#ifdef __i686__ + /* Compiler options say that cmov is available. */ + return 1; +#else + /* ??? Use cpuid or something and figure out what's running. */ + return 0; +#endif +} + +static void tcg_out_movcond(TCGContext *s, const TCGArg *args, + const int *const_args) +{ + int vtc, vfc, cond, use_cmov = 0, do_swap = 0; + TCGArg d, vt, vf; + + d = args[0]; + vt = args[3]; + vf = args[4]; + vtc = const_args[3]; + vfc = const_args[4]; + + /* ??? The jcc code path below assumes that one mov insn must be skipped. + Rather than complicate the code below, make sure to simplify the + conditional move here. */ + if (vtc == vfc && vt == vf) { + if (vtc) + tcg_out_movi(s, TCG_TYPE_I32, d, vt); + else + tcg_out_mov(s, d, vt); + return; + } + + cond = args[5]; + + /* If both arguments are constants, we *could* do all the funny bits that + gcc does with sbc, masks, etc. There's likely no point. Just use the + jcc version in this case. We also have to be careful about clobbering + inputs when trying to move constants into position. */ + + if (have_cmov()) { + use_cmov = 1; + if (vtc) { + if (vfc || d == vf) + use_cmov = 0; + else + do_swap = 1; + } else if (d == vt) { + if (vfc) + use_cmov = 0; + else + do_swap = 1; + } + } + + if (!use_cmov) { + /* We're going to follow the lead of cmov and set D=VF first, + which means inverting the condition upon which we jump. */ + cond = tcg_invert_cond(cond); + + /* Don't allow the move we jump over to be a nop. */ + do_swap = (!vtc && d == vt); + } + + if (do_swap) { + TCGArg t; + cond = tcg_invert_cond(cond); + t = vf, vf = vt, vt = t; + t = vfc, vfc = vtc, vtc = t; + } + + /* If possible, set D=0 before the compare, so that we can use XOR. */ + if (vfc && vf == 0 && d != args[1] && (const_args[2] || d != args[2])) { + tcg_out_movi(s, TCG_TYPE_I32, d, vf); + vf = d, vfc = 0; + } + + tcg_out_cond(s, cond, args[1], args[2], const_args[2]); + + if (vfc) { + /* Force the use of "mov $0, d" to avoid clobbering flags. */ + tcg_out8(s, 0xb8 + d); + tcg_out32(s, vf); + } else { + tcg_out_mov(s, d, vf); + } + + if (use_cmov) { + assert (!vtc); + tcg_out_modrm(s, 0x40 | tcg_cond_to_jcc[cond] | P_EXT, d, vt); + } else { + int label_next = gen_new_label(); + + tcg_out_jxx(s, tcg_cond_to_jcc[cond], label_next, 1); + if (vtc) + tcg_out_movi(s, TCG_TYPE_I32, d, vt); + else + tcg_out_mov(s, d, vt); + + tcg_out_label(s, label_next, (tcg_target_long)s->code_ptr); + } +} + #if defined(CONFIG_SOFTMMU) #include "../../softmmu_defs.h" @@ -913,7 +1080,7 @@ static inline void tcg_out_op(TCGContext *s, int opc, } break; case INDEX_op_br: - tcg_out_jxx(s, JCC_JMP, args[0]); + tcg_out_jxx(s, JCC_JMP, args[0], 0); break; case INDEX_op_movi_i32: tcg_out_movi(s, TCG_TYPE_I32, args[0], args[1]); @@ -1044,10 +1211,11 @@ static inline void tcg_out_op(TCGContext *s, int opc, tcg_out_modrm(s, 0x01 | (ARITH_SBB << 3), args[5], args[1]); break; case INDEX_op_brcond_i32: - tcg_out_brcond(s, args[2], args[0], args[1], const_args[1], args[3]); + tcg_out_brcond(s, args[2], args[0], args[1], const_args[1], + args[3], 0); break; case INDEX_op_brcond2_i32: - tcg_out_brcond2(s, args, const_args); + tcg_out_brcond2(s, args, const_args, 0); break; case INDEX_op_bswap16_i32: @@ -1080,6 +1248,16 @@ static inline void tcg_out_op(TCGContext *s, int opc, tcg_out_modrm(s, 0xb7 | P_EXT, args[0], args[1]); break; + case INDEX_op_setcond_i32: + tcg_out_setcond(s, args[3], args[0], args[1], args[2], const_args[2]); + break; + case INDEX_op_movcond_i32: + tcg_out_movcond(s, args, const_args); + break; + case INDEX_op_setcond2_i32: + tcg_out_setcond2(s, args, const_args); + break; + case INDEX_op_qemu_ld8u: tcg_out_qemu_ld(s, args, 0); break; @@ -1168,6 +1346,10 @@ static const TCGTargetOpDef x86_op_defs[] = { { INDEX_op_ext8u_i32, { "r", "q"} }, { INDEX_op_ext16u_i32, { "r", "r"} }, + { INDEX_op_setcond_i32, { "q", "r", "ri" } }, + { INDEX_op_movcond_i32, { "r", "r", "ri", "ri", "ri" } }, + { INDEX_op_setcond2_i32, { "r", "r", "r", "ri", "ri" } }, + #if TARGET_LONG_BITS == 32 { INDEX_op_qemu_ld8u, { "r", "L" } }, { INDEX_op_qemu_ld8s, { "r", "L" } },