From patchwork Mon Aug 5 18:28:50 2013 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Richard Henderson X-Patchwork-Id: 264744 Return-Path: X-Original-To: incoming@patchwork.ozlabs.org Delivered-To: patchwork-incoming@bilbo.ozlabs.org Received: from lists.gnu.org (lists.gnu.org [IPv6:2001:4830:134:3::11]) (using TLSv1 with cipher AES256-SHA (256/256 bits)) (Client did not present a certificate) by ozlabs.org (Postfix) with ESMTPS id 882952C0077 for ; Tue, 6 Aug 2013 04:39:02 +1000 (EST) Received: from localhost ([::1]:40781 helo=lists.gnu.org) by lists.gnu.org with esmtp (Exim 4.71) (envelope-from ) id 1V6PgH-0006kh-PN for incoming@patchwork.ozlabs.org; Mon, 05 Aug 2013 14:38:57 -0400 Received: from eggs.gnu.org ([2001:4830:134:3::10]:46052) by lists.gnu.org with esmtp (Exim 4.71) (envelope-from ) id 1V6PXp-0004bL-AW for qemu-devel@nongnu.org; Mon, 05 Aug 2013 14:30:19 -0400 Received: from Debian-exim by eggs.gnu.org with spam-scanned (Exim 4.71) (envelope-from ) id 1V6PXi-0001YV-MR for qemu-devel@nongnu.org; Mon, 05 Aug 2013 14:30:13 -0400 Received: from mail-qc0-x236.google.com ([2607:f8b0:400d:c01::236]:41529) by eggs.gnu.org with esmtp (Exim 4.71) (envelope-from ) id 1V6PXi-0001YD-I9 for qemu-devel@nongnu.org; Mon, 05 Aug 2013 14:30:06 -0400 Received: by mail-qc0-f182.google.com with SMTP id c11so1905970qcv.27 for ; Mon, 05 Aug 2013 11:30:06 -0700 (PDT) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=gmail.com; s=20120113; h=sender:from:to:cc:subject:date:message-id:x-mailer:in-reply-to :references; bh=9dc+hFvuXnHeRkOjta7vabuvIhVaPmJKjCy/YxvtULc=; b=Vs+Hy0qXabR3/PVZnRruREuNXNiy8VWYxP+UYVNC1ZcrU0y19DAUxtaMpDcEFOPSa8 ySxFsDAwvSY4WC+OHfeggMG++rb0FeFvwibKMakOgOHHP/spWjggOFIp4EGEerwvrGxa UjIJK6Cib9DeBLQM9AKvcOUgXPji7Q7GiG+8S0uoBKFNg7xtovjZNNwSKBK9qlAGlLWN TFIgM/bl/xkg7i4vJaj2p+xO0zJvOem3spZqIjCyNZD6e6DfXJriGobrkVihjm1OY/jL lE8eAehRfy1VodDovsk5ZZIDcSlChwVsrEDexnPepuE/AmCMbrqYKNQXW7sRGLfCg8p0 biJQ== X-Received: by 10.224.22.195 with SMTP id o3mr29929691qab.90.1375727406211; Mon, 05 Aug 2013 11:30:06 -0700 (PDT) Received: from pebble.com (cpe-66-91-180-52.hawaii.res.rr.com. [66.91.180.52]) by mx.google.com with ESMTPSA id n8sm574544qez.2.2013.08.05.11.30.04 for (version=TLSv1.2 cipher=RC4-SHA bits=128/128); Mon, 05 Aug 2013 11:30:05 -0700 (PDT) From: Richard Henderson To: qemu-devel@nongnu.org Date: Mon, 5 Aug 2013 08:28:50 -1000 Message-Id: <1375727330-30515-16-git-send-email-rth@twiddle.net> X-Mailer: git-send-email 1.8.3.1 In-Reply-To: <1375727330-30515-1-git-send-email-rth@twiddle.net> References: <1375727330-30515-1-git-send-email-rth@twiddle.net> X-detected-operating-system: by eggs.gnu.org: Error: Malformed IPv6 address (bad octet value). X-Received-From: 2607:f8b0:400d:c01::236 Cc: "Vassili Karpov \(malc\)" , Richard Henderson Subject: [Qemu-devel] [PATCH for-next 15/15] tcg-ppc64: Implement CONFIG_QEMU_LDST_OPTIMIZATION X-BeenThere: qemu-devel@nongnu.org X-Mailman-Version: 2.1.14 Precedence: list List-Id: List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Errors-To: qemu-devel-bounces+incoming=patchwork.ozlabs.org@nongnu.org Sender: qemu-devel-bounces+incoming=patchwork.ozlabs.org@nongnu.org Signed-off-by: Richard Henderson --- configure | 2 +- include/exec/exec-all.h | 4 +- tcg/ppc64/tcg-target.c | 219 +++++++++++++++++++++++++++++++----------------- 3 files changed, 146 insertions(+), 79 deletions(-) diff --git a/configure b/configure index 18fa608..5b9a66c 100755 --- a/configure +++ b/configure @@ -3650,7 +3650,7 @@ echo "libs_softmmu=$libs_softmmu" >> $config_host_mak echo "ARCH=$ARCH" >> $config_host_mak case "$cpu" in - arm|i386|x86_64|ppc|aarch64) + aarch64 | arm | i386 | x86_64 | ppc*) # The TCG interpreter currently does not support ld/st optimization. if test "$tcg_interpreter" = "no" ; then echo "CONFIG_QEMU_LDST_OPTIMIZATION=y" >> $config_host_mak diff --git a/include/exec/exec-all.h b/include/exec/exec-all.h index 26c3553..91b189b 100644 --- a/include/exec/exec-all.h +++ b/include/exec/exec-all.h @@ -326,11 +326,11 @@ extern uintptr_t tci_tb_ptr; (5) post-process (e.g. stack adjust) (6) jump to corresponding code of the next of fast path */ -# if defined(__i386__) || defined(__x86_64__) +# if defined(__i386__) || defined(__x86_64__) || defined(_ARCH_PPC64) # define GETRA() ((uintptr_t)__builtin_return_address(0)) /* The return address argument for ldst is passed directly. */ # define GETPC_LDST() (abort(), 0) -# elif defined (_ARCH_PPC) && !defined (_ARCH_PPC64) +# elif defined(_ARCH_PPC) # define GETRA() ((uintptr_t)__builtin_return_address(0)) # define GETPC_LDST() ((uintptr_t) ((*(int32_t *)(GETRA() - 4)) - 1)) # elif defined(__arm__) diff --git a/tcg/ppc64/tcg-target.c b/tcg/ppc64/tcg-target.c index 4b23597..7ecc032 100644 --- a/tcg/ppc64/tcg-target.c +++ b/tcg/ppc64/tcg-target.c @@ -830,26 +830,50 @@ static void tcg_out_call(TCGContext *s, tcg_target_long arg, #endif } +static const PowerOpcode qemu_ldx_opc[8] = { +#ifdef TARGET_WORDS_BIGENDIAN + LBZX, LHZX, LWZX, LDX, + 0, LHAX, LWAX, LDX +#else + LBZX, LHBRX, LWBRX, LDBRX, + 0, 0, 0, LDBRX, +#endif +}; + +static const PowerOpcode qemu_stx_opc[4] = { +#ifdef TARGET_WORDS_BIGENDIAN + STBX, STHX, STWX, STDX +#else + STBX, STHBRX, STWBRX, STDBRX, +#endif +}; + +static const PowerOpcode qemu_exts_opc[4] = { + EXTSB, EXTSH, EXTSW, 0 +}; + #if defined (CONFIG_SOFTMMU) #include "exec/softmmu_defs.h" /* helper signature: helper_ld_mmu(CPUState *env, target_ulong addr, - int mmu_idx) */ + * int mmu_idx, uintptr_t ra) + */ static const void * const qemu_ld_helpers[4] = { - helper_ldb_mmu, - helper_ldw_mmu, - helper_ldl_mmu, - helper_ldq_mmu, + helper_ret_ldb_mmu, + helper_ret_ldw_mmu, + helper_ret_ldl_mmu, + helper_ret_ldq_mmu, }; /* helper signature: helper_st_mmu(CPUState *env, target_ulong addr, - uintxx_t val, int mmu_idx) */ + * uintxx_t val, int mmu_idx, uintptr_t ra) + */ static const void * const qemu_st_helpers[4] = { - helper_stb_mmu, - helper_stw_mmu, - helper_stl_mmu, - helper_stq_mmu, + helper_ret_stb_mmu, + helper_ret_stw_mmu, + helper_ret_stl_mmu, + helper_ret_stq_mmu, }; /* Perform the TLB load and compare. Places the result of the comparison @@ -911,29 +935,108 @@ static TCGReg tcg_out_tlb_read(TCGContext *s, int s_bits, TCGReg addr_reg, return addr_reg; } -#endif -static const PowerOpcode qemu_ldx_opc[8] = { -#ifdef TARGET_WORDS_BIGENDIAN - LBZX, LHZX, LWZX, LDX, - 0, LHAX, LWAX, LDX -#else - LBZX, LHBRX, LWBRX, LDBRX, - 0, 0, 0, LDBRX, -#endif -}; +/* Record the context of a call to the out of line helper code for the slow + path for a load or store, so that we can later generate the correct + helper code. */ +static void add_qemu_ldst_label(TCGContext *s, bool is_ld, int opc, + int data_reg, int addr_reg, int mem_index, + uint8_t *raddr, uint8_t *label_ptr) +{ + int idx; + TCGLabelQemuLdst *label; -static const PowerOpcode qemu_stx_opc[4] = { -#ifdef TARGET_WORDS_BIGENDIAN - STBX, STHX, STWX, STDX -#else - STBX, STHBRX, STWBRX, STDBRX, -#endif -}; + if (s->nb_qemu_ldst_labels >= TCG_MAX_QEMU_LDST) { + tcg_abort(); + } -static const PowerOpcode qemu_exts_opc[4] = { - EXTSB, EXTSH, EXTSW, 0 -}; + idx = s->nb_qemu_ldst_labels++; + label = (TCGLabelQemuLdst *)&s->qemu_ldst_labels[idx]; + label->is_ld = is_ld; + label->opc = opc; + label->datalo_reg = data_reg; + label->addrlo_reg = addr_reg; + label->mem_index = mem_index; + label->raddr = raddr; + label->label_ptr[0] = label_ptr; +} + +/* See the GETPC definition in include/exec/exec-all.h. */ +static inline uintptr_t do_getpc(uint8_t *raddr) +{ + return (uintptr_t)raddr - 1; +} + +static void tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *lb) +{ + int opc = lb->opc; + int s_bits = opc & 3; + PowerOpcode insn; + + reloc_pc14(lb->label_ptr[0], (uintptr_t)s->code_ptr); + + tcg_out_mov(s, TCG_TYPE_PTR, TCG_REG_R3, TCG_AREG0); + + /* If the address needed to be zero-extended, we'll have already + placed it in R4. The only remaining case is 64-bit guest. */ + if (lb->addrlo_reg != TCG_REG_R4) { + tcg_out_mov(s, TCG_TYPE_I64, TCG_REG_R4, lb->addrlo_reg); + } + + tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_R5, lb->mem_index); + tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_R6, do_getpc(lb->raddr)); + + tcg_out_call(s, (tcg_target_long)qemu_ld_helpers[s_bits], 1, LK); + + if (opc & 4) { + insn = qemu_exts_opc[s_bits]; + tcg_out32(s, insn | RA(lb->datalo_reg) | RS(TCG_REG_R3)); + } else { + tcg_out_mov(s, TCG_TYPE_I64, lb->datalo_reg, TCG_REG_R3); + } + + tcg_out_b(s, 0, (uintptr_t)lb->raddr); +} + +static void tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *lb) +{ + int opc = lb->opc; + + reloc_pc14(lb->label_ptr[0], (uintptr_t)s->code_ptr); + + tcg_out_mov(s, TCG_TYPE_I64, TCG_REG_R3, TCG_AREG0); + + /* If the address needed to be zero-extended, we'll have already + placed it in R4. The only remaining case is 64-bit guest. */ + if (lb->addrlo_reg != TCG_REG_R4) { + tcg_out_mov(s, TCG_TYPE_I64, TCG_REG_R4, lb->addrlo_reg); + } + + tcg_out_rld(s, RLDICL, TCG_REG_R5, lb->datalo_reg, + 0, 64 - (1 << (3 + opc))); + tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_R6, lb->mem_index); + tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_R7, do_getpc(lb->raddr)); + + tcg_out_call(s, (tcg_target_long)qemu_st_helpers[opc], 1, LK); + + tcg_out_b(s, 0, (uintptr_t)lb->raddr); +} + +void tcg_out_tb_finalize(TCGContext *s) +{ + int i, n = s->nb_qemu_ldst_labels; + + /* qemu_ld/st slow paths */ + for (i = 0; i < n; i++) { + TCGLabelQemuLdst *label = &s->qemu_ldst_labels[i]; + if (label->is_ld) { + tcg_out_qemu_ld_slow_path(s, label); + } else { + tcg_out_qemu_st_slow_path(s, label); + } + } +} +#endif /* SOFTMMU */ static void tcg_out_qemu_ld (TCGContext *s, const TCGArg *args, int opc) { @@ -941,9 +1044,8 @@ static void tcg_out_qemu_ld (TCGContext *s, const TCGArg *args, int opc) PowerOpcode insn; int s_bits; #ifdef CONFIG_SOFTMMU - TCGReg ir; int mem_index; - void *label1_ptr, *label2_ptr; + void *label_ptr; #endif data_reg = *args++; @@ -955,29 +1057,8 @@ static void tcg_out_qemu_ld (TCGContext *s, const TCGArg *args, int opc) r0 = tcg_out_tlb_read(s, s_bits, addr_reg, mem_index, true); - label1_ptr = s->code_ptr; - tcg_out32(s, BC | BI (7, CR_EQ) | BO_COND_TRUE); - - /* slow path */ - ir = TCG_REG_R3; - tcg_out_mov(s, TCG_TYPE_I64, ir++, TCG_AREG0); - tcg_out_mov(s, TCG_TYPE_I64, ir++, addr_reg); - tcg_out_movi(s, TCG_TYPE_I64, ir++, mem_index); - - tcg_out_call(s, (tcg_target_long)qemu_ld_helpers[s_bits], 1, LK); - - if (opc & 4) { - insn = qemu_exts_opc[s_bits]; - tcg_out32(s, insn | RA(data_reg) | RS(3)); - } else if (data_reg != 3) { - tcg_out_mov(s, TCG_TYPE_I64, data_reg, 3); - } - - label2_ptr = s->code_ptr; - tcg_out32(s, B); - - /* label1: fast path */ - reloc_pc14(label1_ptr, (tcg_target_long)s->code_ptr); + label_ptr = s->code_ptr; + tcg_out32(s, BC | BI (7, CR_EQ) | BO_COND_FALSE); rbase = TCG_REG_R3; r1 = TCG_REG_R0; @@ -1007,7 +1088,8 @@ static void tcg_out_qemu_ld (TCGContext *s, const TCGArg *args, int opc) } #ifdef CONFIG_SOFTMMU - reloc_pc24(label2_ptr, (tcg_target_long)s->code_ptr); + add_qemu_ldst_label(s, true, opc, data_reg, r0, mem_index, + s->code_ptr, label_ptr); #endif } @@ -1016,9 +1098,8 @@ static void tcg_out_qemu_st (TCGContext *s, const TCGArg *args, int opc) TCGReg addr_reg, r0, r1, rbase, data_reg; PowerOpcode insn; #ifdef CONFIG_SOFTMMU - TCGReg ir; int mem_index; - void *label1_ptr, *label2_ptr; + void *label_ptr; #endif data_reg = *args++; @@ -1029,23 +1110,8 @@ static void tcg_out_qemu_st (TCGContext *s, const TCGArg *args, int opc) r0 = tcg_out_tlb_read(s, opc, addr_reg, mem_index, false); - label1_ptr = s->code_ptr; - tcg_out32(s, BC | BI (7, CR_EQ) | BO_COND_TRUE); - - /* slow path */ - ir = TCG_REG_R3; - tcg_out_mov(s, TCG_TYPE_I64, ir++, TCG_AREG0); - tcg_out_mov(s, TCG_TYPE_I64, ir++, addr_reg); - tcg_out_rld(s, RLDICL, ir++, data_reg, 0, 64 - (1 << (3 + opc))); - tcg_out_movi(s, TCG_TYPE_I64, ir++, mem_index); - - tcg_out_call(s, (tcg_target_long)qemu_st_helpers[opc], 1, LK); - - label2_ptr = s->code_ptr; - tcg_out32(s, B); - - /* label1: fast path */ - reloc_pc14(label1_ptr, (tcg_target_long) s->code_ptr); + label_ptr = s->code_ptr; + tcg_out32(s, BC | BI (7, CR_EQ) | BO_COND_FALSE); rbase = TCG_REG_R3; r1 = TCG_REG_R2; @@ -1070,7 +1136,8 @@ static void tcg_out_qemu_st (TCGContext *s, const TCGArg *args, int opc) } #ifdef CONFIG_SOFTMMU - reloc_pc24 (label2_ptr, (tcg_target_long) s->code_ptr); + add_qemu_ldst_label(s, false, opc, data_reg, r0, mem_index, + s->code_ptr, label_ptr); #endif }