From patchwork Mon Aug 5 18:28:44 2013 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Richard Henderson X-Patchwork-Id: 264741 Return-Path: X-Original-To: incoming@patchwork.ozlabs.org Delivered-To: patchwork-incoming@bilbo.ozlabs.org Received: from lists.gnu.org (lists.gnu.org [IPv6:2001:4830:134:3::11]) (using TLSv1 with cipher AES256-SHA (256/256 bits)) (Client did not present a certificate) by ozlabs.org (Postfix) with ESMTPS id 3721D2C007E for ; Tue, 6 Aug 2013 04:33:51 +1000 (EST) Received: from localhost ([::1]:58008 helo=lists.gnu.org) by lists.gnu.org with esmtp (Exim 4.71) (envelope-from ) id 1V6PbH-0000Ya-3N for incoming@patchwork.ozlabs.org; Mon, 05 Aug 2013 14:33:47 -0400 Received: from eggs.gnu.org ([2001:4830:134:3::10]:45906) by lists.gnu.org with esmtp (Exim 4.71) (envelope-from ) id 1V6PXV-00040j-DF for qemu-devel@nongnu.org; Mon, 05 Aug 2013 14:29:59 -0400 Received: from Debian-exim by eggs.gnu.org with spam-scanned (Exim 4.71) (envelope-from ) id 1V6PXP-0001FE-EG for qemu-devel@nongnu.org; Mon, 05 Aug 2013 14:29:53 -0400 Received: from mail-qc0-x22f.google.com ([2607:f8b0:400d:c01::22f]:61065) by eggs.gnu.org with esmtp (Exim 4.71) (envelope-from ) id 1V6PXP-0001F6-9I for qemu-devel@nongnu.org; Mon, 05 Aug 2013 14:29:47 -0400 Received: by mail-qc0-f175.google.com with SMTP id s11so1868904qcv.20 for ; Mon, 05 Aug 2013 11:29:47 -0700 (PDT) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=gmail.com; s=20120113; h=sender:from:to:cc:subject:date:message-id:x-mailer:in-reply-to :references; bh=uLJLIknRv5OCoZ4jH6vlGSbhX1LiEuo6MQg4rXudllc=; b=e53ETBZd55vmZwgOMwY7b+8f/FI2C11Te2wt4sedCc5jdWydWVlBHu8gE9Wz28L61T TUIcjVWSORiNn6oL/6VI01VbFRj+ZTFU6MrhrBmE3bNTkfRDncygQGwi+XcHRBuzWGxo KwM+ColC7mxeBZi7K4TEaMMmFtSOEQWrUYR+TlgSAqjMv6UZk/nVLc9w9O8ZGoQteK9L WBgqEl3R0EOr56rJGA36TCg6rd6tru0O4wZC5mmn3CBS3AXvxMuj9D4YDk1tY9YUDnHn 3qKkLzTfs9pI1iUMehUJMsk+34k+6JPXJQt8VbdciEJ1Sti7G1O1bpen21O+UYp0NBT+ mJMA== X-Received: by 10.224.165.11 with SMTP id g11mr30937648qay.8.1375727386925; Mon, 05 Aug 2013 11:29:46 -0700 (PDT) Received: from pebble.com (cpe-66-91-180-52.hawaii.res.rr.com. [66.91.180.52]) by mx.google.com with ESMTPSA id n8sm574544qez.2.2013.08.05.11.29.45 for (version=TLSv1.2 cipher=RC4-SHA bits=128/128); Mon, 05 Aug 2013 11:29:46 -0700 (PDT) From: Richard Henderson To: qemu-devel@nongnu.org Date: Mon, 5 Aug 2013 08:28:44 -1000 Message-Id: <1375727330-30515-10-git-send-email-rth@twiddle.net> X-Mailer: git-send-email 1.8.3.1 In-Reply-To: <1375727330-30515-1-git-send-email-rth@twiddle.net> References: <1375727330-30515-1-git-send-email-rth@twiddle.net> X-detected-operating-system: by eggs.gnu.org: Error: Malformed IPv6 address (bad octet value). X-Received-From: 2607:f8b0:400d:c01::22f Cc: "Vassili Karpov \(malc\)" , Richard Henderson Subject: [Qemu-devel] [PATCH for-next 09/15] tcg-ppc64: Handle long offsets better X-BeenThere: qemu-devel@nongnu.org X-Mailman-Version: 2.1.14 Precedence: list List-Id: List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Errors-To: qemu-devel-bounces+incoming=patchwork.ozlabs.org@nongnu.org Sender: qemu-devel-bounces+incoming=patchwork.ozlabs.org@nongnu.org Previously we'd only handle 16-bit offsets from memory operand without falling back to indexed, but it's easy to use ADDIS to handle full 32-bit offsets. This also lets us unify code that existed inline in tcg_out_op for handling addition of large constants. Signed-off-by: Richard Henderson --- tcg/ppc64/tcg-target.c | 159 +++++++++++++++++++++++++------------------------ 1 file changed, 81 insertions(+), 78 deletions(-) diff --git a/tcg/ppc64/tcg-target.c b/tcg/ppc64/tcg-target.c index a79b876..e9c41fb 100644 --- a/tcg/ppc64/tcg-target.c +++ b/tcg/ppc64/tcg-target.c @@ -119,7 +119,6 @@ static const int tcg_target_reg_alloc_order[] = { TCG_REG_R31, TCG_REG_R12, /* call clobbered, non-arguments */ TCG_REG_R11, - TCG_REG_R2, TCG_REG_R10, /* call clobbered, arguments */ TCG_REG_R9, TCG_REG_R8, @@ -742,25 +741,55 @@ static void tcg_out_call(TCGContext *s, tcg_target_long arg, #endif } -static void tcg_out_ldst(TCGContext *s, TCGReg ret, TCGReg addr, - int offset, PowerOpcode op1, PowerOpcode op2) +static void tcg_out_mem_long(TCGContext *s, PowerOpcode opi, PowerOpcode opx, + TCGReg rt, TCGReg base, tcg_target_long offset) { - if (offset == (int16_t) offset) { - tcg_out32(s, op1 | TAI(ret, addr, offset)); - } else { - tcg_out_movi(s, TCG_TYPE_I64, 0, offset); - tcg_out32(s, op2 | TAB(ret, addr, 0)); + tcg_target_long orig = offset, l0, l1, extra = 0, align = 0; + TCGReg rs = TCG_REG_R2; + + assert(rt != TCG_REG_R2 && base != TCG_REG_R2); + + switch (opi) { + case LD: case LWA: + align = 3; + /* FALLTHRU */ + default: + if (rt != TCG_REG_R0) { + rs = rt; + } + break; + case STD: + align = 3; + break; + case STB: case STH: case STW: + break; } -} -static void tcg_out_ldsta(TCGContext *s, TCGReg ret, TCGReg addr, - int offset, PowerOpcode op1, PowerOpcode op2) -{ - if (offset == (int16_t) (offset & ~3)) { - tcg_out32(s, op1 | TAI(ret, addr, offset)); - } else { - tcg_out_movi(s, TCG_TYPE_I64, 0, offset); - tcg_out32(s, op2 | TAB(ret, addr, 0)); + /* For unaligned, or very large offsets, use the indexed form. */ + if (offset & align || offset != (int32_t)offset) { + tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_R2, orig); + tcg_out32(s, opx | TAB(rt, base, TCG_REG_R2)); + return; + } + + l0 = (int16_t)offset; + offset = (offset - l0) >> 16; + l1 = (int16_t)offset; + + if (l1 < 0 && orig >= 0) { + extra = 0x4000; + l1 = (int16_t)(offset - 0x4000); + } + if (l1) { + tcg_out32(s, ADDIS | TAI(rs, base, l1)); + base = rs; + } + if (extra) { + tcg_out32(s, ADDIS | TAI(rs, base, extra)); + base = rs; + } + if (opi != ADDI || base != rt || l0 != 0) { + tcg_out32(s, opi | TAI(rt, base, l0)); } } @@ -1088,22 +1117,30 @@ static void tcg_target_qemu_prologue (TCGContext *s) tcg_out32(s, BCLR | BO_ALWAYS); } -static void tcg_out_ld (TCGContext *s, TCGType type, TCGReg ret, TCGReg arg1, - tcg_target_long arg2) +static void tcg_out_ld(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg1, + tcg_target_long arg2) { - if (type == TCG_TYPE_I32) - tcg_out_ldst (s, ret, arg1, arg2, LWZ, LWZX); - else - tcg_out_ldsta (s, ret, arg1, arg2, LD, LDX); + PowerOpcode opi, opx; + + if (type == TCG_TYPE_I32) { + opi = LWZ, opx = LWZX; + } else { + opi = LD, opx = LDX; + } + tcg_out_mem_long(s, opi, opx, ret, arg1, arg2); } -static void tcg_out_st (TCGContext *s, TCGType type, TCGReg arg, TCGReg arg1, - tcg_target_long arg2) +static void tcg_out_st(TCGContext *s, TCGType type, TCGReg arg, TCGReg arg1, + tcg_target_long arg2) { - if (type == TCG_TYPE_I32) - tcg_out_ldst (s, arg, arg1, arg2, STW, STWX); - else - tcg_out_ldsta (s, arg, arg1, arg2, STD, STDX); + PowerOpcode opi, opx; + + if (type == TCG_TYPE_I32) { + opi = STW, opx = STWX; + } else { + opi = STD, opx = STDX; + } + tcg_out_mem_long(s, opi, opx, arg, arg1, arg2); } static void tcg_out_cmp(TCGContext *s, int cond, TCGArg arg1, TCGArg arg2, @@ -1464,61 +1501,52 @@ static void tcg_out_op (TCGContext *s, TCGOpcode opc, const TCGArg *args, break; case INDEX_op_ld8u_i32: case INDEX_op_ld8u_i64: - tcg_out_ldst (s, args[0], args[1], args[2], LBZ, LBZX); + tcg_out_mem_long(s, LBZ, LBZX, args[0], args[1], args[2]); break; case INDEX_op_ld8s_i32: case INDEX_op_ld8s_i64: - tcg_out_ldst (s, args[0], args[1], args[2], LBZ, LBZX); - tcg_out32 (s, EXTSB | RS (args[0]) | RA (args[0])); + tcg_out_mem_long(s, LBZ, LBZX, args[0], args[1], args[2]); + tcg_out32(s, EXTSB | RS(args[0]) | RA(args[0])); break; case INDEX_op_ld16u_i32: case INDEX_op_ld16u_i64: - tcg_out_ldst (s, args[0], args[1], args[2], LHZ, LHZX); + tcg_out_mem_long(s, LHZ, LHZX, args[0], args[1], args[2]); break; case INDEX_op_ld16s_i32: case INDEX_op_ld16s_i64: - tcg_out_ldst (s, args[0], args[1], args[2], LHA, LHAX); + tcg_out_mem_long(s, LHA, LHAX, args[0], args[1], args[2]); break; case INDEX_op_ld_i32: case INDEX_op_ld32u_i64: - tcg_out_ldst (s, args[0], args[1], args[2], LWZ, LWZX); + tcg_out_mem_long(s, LWZ, LWZX, args[0], args[1], args[2]); break; case INDEX_op_ld32s_i64: - tcg_out_ldsta (s, args[0], args[1], args[2], LWA, LWAX); + tcg_out_mem_long(s, LWA, LWAX, args[0], args[1], args[2]); break; case INDEX_op_ld_i64: - tcg_out_ldsta (s, args[0], args[1], args[2], LD, LDX); + tcg_out_mem_long(s, LD, LDX, args[0], args[1], args[2]); break; case INDEX_op_st8_i32: case INDEX_op_st8_i64: - tcg_out_ldst (s, args[0], args[1], args[2], STB, STBX); + tcg_out_mem_long(s, STB, STBX, args[0], args[1], args[2]); break; case INDEX_op_st16_i32: case INDEX_op_st16_i64: - tcg_out_ldst (s, args[0], args[1], args[2], STH, STHX); + tcg_out_mem_long(s, STH, STHX, args[0], args[1], args[2]); break; case INDEX_op_st_i32: case INDEX_op_st32_i64: - tcg_out_ldst (s, args[0], args[1], args[2], STW, STWX); + tcg_out_mem_long(s, STW, STWX, args[0], args[1], args[2]); break; case INDEX_op_st_i64: - tcg_out_ldsta (s, args[0], args[1], args[2], STD, STDX); + tcg_out_mem_long(s, STD, STDX, args[0], args[1], args[2]); break; case INDEX_op_add_i32: a0 = args[0], a1 = args[1], a2 = args[2]; if (const_args[2]) { - int32_t l, h; do_addi_32: - l = (int16_t)a2; - h = a2 - l; - if (h) { - tcg_out32(s, ADDIS | TAI(a0, a1, h >> 16)); - a1 = a0; - } - if (l || a0 != a1) { - tcg_out32(s, ADDI | TAI(a0, a1, l)); - } + tcg_out_mem_long(s, ADDI, ADD, a0, a1, (int32_t)a2); } else { tcg_out32(s, ADD | TAB(a0, a1, a2)); } @@ -1694,32 +1722,8 @@ static void tcg_out_op (TCGContext *s, TCGOpcode opc, const TCGArg *args, case INDEX_op_add_i64: a0 = args[0], a1 = args[1], a2 = args[2]; if (const_args[2]) { - int32_t l0, h1, h2; do_addi_64: - /* We can always split any 32-bit signed constant into 3 pieces. - Note the positive 0x80000000 coming from the sub_i64 path, - handled with the same code we need for eg 0x7fff8000. */ - assert(a2 == (int32_t)a2 || a2 == 0x80000000); - l0 = (int16_t)a2; - h1 = a2 - l0; - h2 = 0; - if (h1 < 0 && (int64_t)a2 > 0) { - h2 = 0x40000000; - h1 = a2 - h2 - l0; - } - assert((TCGArg)h2 + h1 + l0 == a2); - - if (h2) { - tcg_out32(s, ADDIS | TAI(a0, a1, h2 >> 16)); - a1 = a0; - } - if (h1) { - tcg_out32(s, ADDIS | TAI(a0, a1, h1 >> 16)); - a1 = a0; - } - if (l0 || a0 != a1) { - tcg_out32(s, ADDI | TAI(a0, a1, l0)); - } + tcg_out_mem_long(s, ADDI, ADD, a0, a1, a2); } else { tcg_out32(s, ADD | TAB(a0, a1, a2)); } @@ -2175,10 +2179,9 @@ static void tcg_target_init (TCGContext *s) tcg_regset_clear (s->reserved_regs); tcg_regset_set_reg (s->reserved_regs, TCG_REG_R0); /* tcg temp */ tcg_regset_set_reg (s->reserved_regs, TCG_REG_R1); /* stack pointer */ + tcg_regset_set_reg (s->reserved_regs, TCG_REG_R2); /* mem temp */ #ifdef __APPLE__ tcg_regset_set_reg (s->reserved_regs, TCG_REG_R11); /* ??? */ -#else - tcg_regset_set_reg (s->reserved_regs, TCG_REG_R2); /* toc */ #endif tcg_regset_set_reg (s->reserved_regs, TCG_REG_R13); /* thread pointer */