From patchwork Tue Feb 19 17:40:28 2013 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Richard Henderson X-Patchwork-Id: 221871 Return-Path: X-Original-To: incoming@patchwork.ozlabs.org Delivered-To: patchwork-incoming@bilbo.ozlabs.org Received: from lists.gnu.org (lists.gnu.org [208.118.235.17]) (using TLSv1 with cipher AES256-SHA (256/256 bits)) (Client did not present a certificate) by ozlabs.org (Postfix) with ESMTPS id 5D2442C0099 for ; Wed, 20 Feb 2013 07:14:36 +1100 (EST) Received: from localhost ([::1]:39883 helo=lists.gnu.org) by lists.gnu.org with esmtp (Exim 4.71) (envelope-from ) id 1U7taE-0000eq-64 for incoming@patchwork.ozlabs.org; Tue, 19 Feb 2013 15:14:34 -0500 Received: from eggs.gnu.org ([208.118.235.92]:44611) by lists.gnu.org with esmtp (Exim 4.71) (envelope-from ) id 1U7rD0-00071W-Bp for qemu-devel@nongnu.org; Tue, 19 Feb 2013 12:42:33 -0500 Received: from Debian-exim by eggs.gnu.org with spam-scanned (Exim 4.71) (envelope-from ) id 1U7rCt-0008MN-JJ for qemu-devel@nongnu.org; Tue, 19 Feb 2013 12:42:26 -0500 Received: from mail-pa0-f43.google.com ([209.85.220.43]:47943) by eggs.gnu.org with esmtp (Exim 4.71) (envelope-from ) id 1U7rCt-0008Lr-58 for qemu-devel@nongnu.org; Tue, 19 Feb 2013 12:42:19 -0500 Received: by mail-pa0-f43.google.com with SMTP id bh2so3564352pad.16 for ; Tue, 19 Feb 2013 09:42:16 -0800 (PST) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=gmail.com; s=20120113; h=x-received:sender:from:to:cc:subject:date:message-id:x-mailer :in-reply-to:references; bh=EaB3P46b5wNogC8tzSxG49lI/1bBh+i1Khdw3rn3D3o=; b=knOxsQBYYM8Ap5OSp+wxUq0Cm4g7d24sf/sTqaBuI00ZIXcemciffJeWXbe6sSdnLs IEvtVtJLCDXdoVWY7VQqSKV6AYQN5Pk17T8xzxkktoUy0nFJ4QT/DhP6EGJD6D/oIVMc IOlcYDnuT/3Dg4fH6ldi7Ex3kwkrSbVm2cEL+CD9/RZDidUl/E1S0j+H9sVTfw9Fhc6c xOo1PhHQeIwXPtZEiTN1GI/KQVwIMvg4pI5r5Zf9k9v6toX89M2L4ANVpUb7BIgZTqD/ Zsm0ET7/wNhHmOQhsVdnwGtMaHWghdQvfxhVJmESNXVXoHaXHP/VPfc41B5gZISSTPMy zR3g== X-Received: by 10.68.127.202 with SMTP id ni10mr42910127pbb.124.1361295736593; Tue, 19 Feb 2013 09:42:16 -0800 (PST) Received: from anchor.twiddle.net (50-194-63-110-static.hfc.comcastbusiness.net. [50.194.63.110]) by mx.google.com with ESMTPS id 1sm18659295pbg.18.2013.02.19.09.42.14 (version=TLSv1.2 cipher=RC4-SHA bits=128/128); Tue, 19 Feb 2013 09:42:15 -0800 (PST) From: Richard Henderson To: qemu-devel@nongnu.org Date: Tue, 19 Feb 2013 09:40:28 -0800 Message-Id: <1361295631-21316-55-git-send-email-rth@twiddle.net> X-Mailer: git-send-email 1.8.1.2 In-Reply-To: <1361295631-21316-1-git-send-email-rth@twiddle.net> References: <1361295631-21316-1-git-send-email-rth@twiddle.net> X-detected-operating-system: by eggs.gnu.org: GNU/Linux 3.x [fuzzy] X-Received-From: 209.85.220.43 Cc: blauwirbel@gmail.com, pbonzini@redhat.com, afaerber@suse.de, aurelien@aurel32.net Subject: [Qemu-devel] [PATCH 54/57] target-i386: Implement ADX extension X-BeenThere: qemu-devel@nongnu.org X-Mailman-Version: 2.1.14 Precedence: list List-Id: List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Errors-To: qemu-devel-bounces+incoming=patchwork.ozlabs.org@nongnu.org Sender: qemu-devel-bounces+incoming=patchwork.ozlabs.org@nongnu.org Signed-off-by: Richard Henderson --- target-i386/cc_helper.c | 30 +++++++++++++ target-i386/cpu.c | 4 +- target-i386/cpu.h | 4 ++ target-i386/helper.c | 4 ++ target-i386/translate.c | 109 ++++++++++++++++++++++++++++++++++++++++++++++-- 5 files changed, 146 insertions(+), 5 deletions(-) diff --git a/target-i386/cc_helper.c b/target-i386/cc_helper.c index 5ea6a0a..6cf57a7 100644 --- a/target-i386/cc_helper.c +++ b/target-i386/cc_helper.c @@ -75,6 +75,24 @@ const uint8_t parity_table[256] = { #endif +static target_ulong compute_all_adcx(target_ulong dst, target_ulong src1, + target_ulong src2) +{ + return (src1 & ~CC_C) | (dst * CC_C); +} + +static target_ulong compute_all_adox(target_ulong dst, target_ulong src1, + target_ulong src2) +{ + return (src1 & ~CC_O) | (src2 * CC_O); +} + +static target_ulong compute_all_adcox(target_ulong dst, target_ulong src1, + target_ulong src2) +{ + return (src1 & ~(CC_C | CC_O)) | (dst * CC_C) | (src2 * CC_O); +} + target_ulong helper_cc_compute_all(target_ulong dst, target_ulong src1, target_ulong src2, int op) { @@ -162,6 +180,13 @@ target_ulong helper_cc_compute_all(target_ulong dst, target_ulong src1, case CC_OP_BMILGL: return compute_all_bmilgl(dst, src1); + case CC_OP_ADCX: + return compute_all_adcx(dst, src1, src2); + case CC_OP_ADOX: + return compute_all_adox(dst, src1, src2); + case CC_OP_ADCOX: + return compute_all_adcox(dst, src1, src2); + #ifdef TARGET_X86_64 case CC_OP_MULQ: return compute_all_mulq(dst, src1); @@ -210,6 +235,7 @@ target_ulong helper_cc_compute_c(target_ulong dst, target_ulong src1, case CC_OP_SARW: case CC_OP_SARL: case CC_OP_SARQ: + case CC_OP_ADOX: return src1 & 1; case CC_OP_INCB: @@ -228,6 +254,10 @@ target_ulong helper_cc_compute_c(target_ulong dst, target_ulong src1, case CC_OP_MULQ: return src1 != 0; + case CC_OP_ADCX: + case CC_OP_ADCOX: + return dst; + case CC_OP_ADDB: return compute_c_addb(dst, src1); case CC_OP_ADDW: diff --git a/target-i386/cpu.c b/target-i386/cpu.c index 0cb64ab..5582e5f 100644 --- a/target-i386/cpu.c +++ b/target-i386/cpu.c @@ -407,11 +407,11 @@ typedef struct x86_def_t { CPUID_EXT3_CR8LEG | CPUID_EXT3_ABM | CPUID_EXT3_SSE4A) #define TCG_SVM_FEATURES 0 #define TCG_7_0_EBX_FEATURES (CPUID_7_0_EBX_SMEP | CPUID_7_0_EBX_SMAP \ - CPUID_7_0_EBX_BMI1 | CPUID_7_0_EBX_BMI2) + CPUID_7_0_EBX_BMI1 | CPUID_7_0_EBX_BMI2 | CPUID_7_0_EBX_ADX) /* missing: CPUID_7_0_EBX_FSGSBASE, CPUID_7_0_EBX_HLE, CPUID_7_0_EBX_AVX2, CPUID_7_0_EBX_ERMS, CPUID_7_0_EBX_INVPCID, CPUID_7_0_EBX_RTM, - CPUID_7_0_EBX_RDSEED, CPUID_7_0_EBX_ADX */ + CPUID_7_0_EBX_RDSEED */ /* built-in CPU model definitions */ diff --git a/target-i386/cpu.h b/target-i386/cpu.h index 960676b..e0443d8 100644 --- a/target-i386/cpu.h +++ b/target-i386/cpu.h @@ -641,6 +641,10 @@ typedef enum { CC_OP_BMILGL, CC_OP_BMILGQ, + CC_OP_ADCX, /* CC_DST = C, CC_SRC = rest. */ + CC_OP_ADOX, /* CC_DST = O, CC_SRC = rest. */ + CC_OP_ADCOX, /* CC_DST = C, CC_SRC2 = O, CC_SRC = rest. */ + CC_OP_NB, } CCOp; diff --git a/target-i386/helper.c b/target-i386/helper.c index 74d600f..66c3624 100644 --- a/target-i386/helper.c +++ b/target-i386/helper.c @@ -113,6 +113,10 @@ static const char *cc_op_str[CC_OP_NB] = { "BMILGW", "BMILGL", "BMILGQ", + + "ADCX", + "ADOX", + "ADCOX", }; static void diff --git a/target-i386/translate.c b/target-i386/translate.c index 68e30e6..7edfb55 100644 --- a/target-i386/translate.c +++ b/target-i386/translate.c @@ -210,6 +210,9 @@ static const uint8_t cc_op_live[CC_OP_NB] = { [CC_OP_SHLB ... CC_OP_SHLQ] = USES_CC_DST | USES_CC_SRC, [CC_OP_SARB ... CC_OP_SARQ] = USES_CC_DST | USES_CC_SRC, [CC_OP_BMILGB ... CC_OP_BMILGQ] = USES_CC_DST | USES_CC_SRC, + [CC_OP_ADCX] = USES_CC_DST | USES_CC_SRC, + [CC_OP_ADOX] = USES_CC_SRC | USES_CC_SRC2, + [CC_OP_ADCOX] = USES_CC_DST | USES_CC_SRC | USES_CC_SRC2, }; static void set_cc_op(DisasContext *s, CCOp op) @@ -994,6 +997,11 @@ static CCPrepare gen_prepare_eflags_c(DisasContext *s, TCGv reg) t0 = gen_ext_tl(reg, cpu_cc_src, size, false); return (CCPrepare) { .cond = TCG_COND_EQ, .reg = t0, .mask = -1 }; + case CC_OP_ADCX: + case CC_OP_ADCOX: + return (CCPrepare) { .cond = TCG_COND_NE, .reg = cpu_cc_dst, + .mask = -1, .no_setcond = true }; + case CC_OP_EFLAGS: case CC_OP_SARB ... CC_OP_SARQ: /* CC_SRC & 1 */ @@ -1027,6 +1035,9 @@ static CCPrepare gen_prepare_eflags_s(DisasContext *s, TCGv reg) gen_compute_eflags(s); /* FALLTHRU */ case CC_OP_EFLAGS: + case CC_OP_ADCX: + case CC_OP_ADOX: + case CC_OP_ADCOX: return (CCPrepare) { .cond = TCG_COND_NE, .reg = cpu_cc_src, .mask = CC_S }; default: @@ -1041,9 +1052,17 @@ static CCPrepare gen_prepare_eflags_s(DisasContext *s, TCGv reg) /* compute eflags.O to reg */ static CCPrepare gen_prepare_eflags_o(DisasContext *s, TCGv reg) { - gen_compute_eflags(s); - return (CCPrepare) { .cond = TCG_COND_NE, .reg = cpu_cc_src, - .mask = CC_O }; + switch (s->cc_op) { + case CC_OP_ADOX: + case CC_OP_ADCOX: + return (CCPrepare) { .cond = TCG_COND_NE, .reg = cpu_cc_src2, + .mask = -1, .no_setcond = true }; + + default: + gen_compute_eflags(s); + return (CCPrepare) { .cond = TCG_COND_NE, .reg = cpu_cc_src, + .mask = CC_O }; + } } /* compute eflags.Z to reg */ @@ -1054,6 +1073,9 @@ static CCPrepare gen_prepare_eflags_z(DisasContext *s, TCGv reg) gen_compute_eflags(s); /* FALLTHRU */ case CC_OP_EFLAGS: + case CC_OP_ADCX: + case CC_OP_ADOX: + case CC_OP_ADCOX: return (CCPrepare) { .cond = TCG_COND_NE, .reg = cpu_cc_src, .mask = CC_Z }; default: @@ -4174,6 +4196,87 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b, gen_helper_pext(cpu_regs[reg], cpu_T[0], cpu_T[1]); break; + case 0x1f6: /* adcx Gy, Ey */ + case 0x2f6: /* adox Gy, Ey */ + if (!(s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_ADX)) { + goto illegal_op; + } else { + TCGv carry_in, carry_out; + int end_op; + + ot = (s->dflag == 2 ? OT_QUAD : OT_LONG); + gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0); + + /* Re-use the carry-out from a previous round. */ + TCGV_UNUSED(carry_in); + carry_out = (b == 0x1f6 ? cpu_cc_dst : cpu_cc_src2); + switch (s->cc_op) { + case CC_OP_ADCX: + if (b == 0x1f6) { + carry_in = cpu_cc_dst; + end_op = CC_OP_ADCX; + } else { + end_op = CC_OP_ADCOX; + } + break; + case CC_OP_ADOX: + if (b == 0x1f6) { + end_op = CC_OP_ADCOX; + } else { + carry_in = cpu_cc_src2; + end_op = CC_OP_ADOX; + } + break; + case CC_OP_ADCOX: + end_op = CC_OP_ADCOX; + carry_in = carry_out; + break; + default: + end_op = (b == 0x1f6 ? CC_OP_ADCX : CC_OP_ADCOX); + break; + } + /* If we can't reuse carry-out, get it out of EFLAGS. */ + if (TCGV_IS_UNUSED(carry_in)) { + if (s->cc_op != CC_OP_ADCX && s->cc_op != CC_OP_ADOX) { + gen_compute_eflags(s); + } + carry_in = cpu_tmp0; + tcg_gen_shri_tl(carry_in, cpu_cc_src, + ctz32(b == 0x1f6 ? CC_C : CC_O)); + tcg_gen_andi_tl(carry_in, carry_in, 1); + } + + switch (ot) { +#ifdef TARGET_X86_64 + case OT_LONG: + /* If we know TL is 64-bit, and we want a 32-bit + result, just do everything in 64-bit arithmetic. */ + tcg_gen_ext32u_i64(cpu_regs[reg], cpu_regs[reg]); + tcg_gen_ext32u_i64(cpu_T[0], cpu_T[0]); + tcg_gen_add_i64(cpu_T[0], cpu_T[0], cpu_regs[reg]); + tcg_gen_add_i64(cpu_T[0], cpu_T[0], carry_in); + tcg_gen_ext32u_i64(cpu_regs[reg], cpu_T[0]); + tcg_gen_shri_i64(carry_out, cpu_T[0], 32); + break; +#endif + default: + /* Otherwise compute the carry-out in two steps. */ + tcg_gen_add_tl(cpu_T[0], cpu_T[0], cpu_regs[reg]); + tcg_gen_setcond_tl(TCG_COND_LTU, cpu_tmp4, + cpu_T[0], cpu_regs[reg]); + tcg_gen_add_tl(cpu_regs[reg], cpu_T[0], carry_in); + tcg_gen_setcond_tl(TCG_COND_LTU, carry_out, + cpu_T[0], carry_in); + tcg_gen_or_tl(carry_out, carry_out, cpu_tmp4); + break; + } + /* We began with all flags computed to CC_SRC, and we + have now placed the carry-out in CC_DST. All that + is left is to record the CC_OP. */ + set_cc_op(s, end_op); + } + break; + case 0x1f7: /* shlx Gy, Ey, By */ case 0x2f7: /* sarx Gy, Ey, By */ case 0x3f7: /* shrx Gy, Ey, By */