From patchwork Thu Apr 4 22:56:21 2013 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Richard Henderson X-Patchwork-Id: 233987 Return-Path: X-Original-To: incoming@patchwork.ozlabs.org Delivered-To: patchwork-incoming@bilbo.ozlabs.org Received: from lists.gnu.org (lists.gnu.org [208.118.235.17]) (using TLSv1 with cipher AES256-SHA (256/256 bits)) (Client did not present a certificate) by ozlabs.org (Postfix) with ESMTPS id ACCCF2C008D for ; Fri, 5 Apr 2013 10:10:50 +1100 (EST) Received: from localhost ([::1]:33837 helo=lists.gnu.org) by lists.gnu.org with esmtp (Exim 4.71) (envelope-from ) id 1UNtIu-0000Y7-OT for incoming@patchwork.ozlabs.org; Thu, 04 Apr 2013 19:10:48 -0400 Received: from eggs.gnu.org ([208.118.235.92]:47711) by lists.gnu.org with esmtp (Exim 4.71) (envelope-from ) id 1UNt6E-0006YR-Er for qemu-devel@nongnu.org; Thu, 04 Apr 2013 18:57:49 -0400 Received: from Debian-exim by eggs.gnu.org with spam-scanned (Exim 4.71) (envelope-from ) id 1UNt67-0001RE-JM for qemu-devel@nongnu.org; Thu, 04 Apr 2013 18:57:42 -0400 Received: from mail-oa0-f49.google.com ([209.85.219.49]:44399) by eggs.gnu.org with esmtp (Exim 4.71) (envelope-from ) id 1UNt67-0001R3-DK for qemu-devel@nongnu.org; Thu, 04 Apr 2013 18:57:35 -0400 Received: by mail-oa0-f49.google.com with SMTP id j6so3466661oag.36 for ; Thu, 04 Apr 2013 15:57:34 -0700 (PDT) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=gmail.com; s=20120113; h=x-received:sender:from:to:cc:subject:date:message-id:x-mailer :in-reply-to:references; bh=wNyG6/XTXemSMI2WSFGMS42q7gAPAnf0xlpwTqGGgwY=; b=P1aDAPdjt7LNy9e4nXvf8OyCrQx92L/h6HoprYUCHi+ARiDNGpRsow/lJlUdmQbVQc KLkPH4A0QPoJopwM0ybyGTOB9j5aSBap2Vc3iO4faTpck0oW0lb0spS9wA+1z7BXiZ5A 86HmVr3/0ZNkNQb9jQl06zTUyJgYLYQL8hMYm35rNCQg2h+Hxp/iTXcUClQijVJiLH1w YX2C1icx5K6mcRdowpDncqRU3jI+rn8iGhduFrOn8FSo2pp9hzlTASPvrWGbRuPEHKEU hrml89k1kkRQt04NaSyIrObtMEr8xKkTHNCNNoBJGuhZ+w746FPzUJ8AKWiTMRaTF4RQ NNyQ== X-Received: by 10.60.32.243 with SMTP id m19mr6261336oei.13.1365116254794; Thu, 04 Apr 2013 15:57:34 -0700 (PDT) Received: from pebble.com ([12.236.175.36]) by mx.google.com with ESMTPS id j10sm9227424obg.4.2013.04.04.15.57.33 (version=TLSv1.2 cipher=RC4-SHA bits=128/128); Thu, 04 Apr 2013 15:57:34 -0700 (PDT) From: Richard Henderson To: qemu-devel@nongnu.org Date: Thu, 4 Apr 2013 17:56:21 -0500 Message-Id: <1365116186-19382-29-git-send-email-rth@twiddle.net> X-Mailer: git-send-email 1.8.1.4 In-Reply-To: <1365116186-19382-1-git-send-email-rth@twiddle.net> References: <1365116186-19382-1-git-send-email-rth@twiddle.net> X-detected-operating-system: by eggs.gnu.org: GNU/Linux 3.x [fuzzy] X-Received-From: 209.85.219.49 Cc: av1474@comtv.ru, Aurelien Jarno Subject: [Qemu-devel] [PATCH v4 28/33] tcg-ppc64: Use ISEL for setcond X-BeenThere: qemu-devel@nongnu.org X-Mailman-Version: 2.1.14 Precedence: list List-Id: List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Errors-To: qemu-devel-bounces+incoming=patchwork.ozlabs.org@nongnu.org Sender: qemu-devel-bounces+incoming=patchwork.ozlabs.org@nongnu.org There are a few simple special cases that should be handled first. Break these out to subroutines to avoid code duplication. Signed-off-by: Richard Henderson Reviewed-by: Aurelien Jarno --- tcg/ppc64/tcg-target.c | 181 ++++++++++++++++++++++++++++++++----------------- 1 file changed, 119 insertions(+), 62 deletions(-) diff --git a/tcg/ppc64/tcg-target.c b/tcg/ppc64/tcg-target.c index f0ed698..27a7ff2 100644 --- a/tcg/ppc64/tcg-target.c +++ b/tcg/ppc64/tcg-target.c @@ -45,6 +45,7 @@ static uint8_t *tb_ret_addr; #endif #define HAVE_ISA_2_06 0 +#define HAVE_ISEL 0 #ifdef CONFIG_USE_GUEST_BASE #define TCG_GUEST_BASE_REG 30 @@ -390,6 +391,7 @@ static int tcg_target_const_match (tcg_target_long val, #define ORC XO31(412) #define EQV XO31(284) #define NAND XO31(476) +#define ISEL XO31( 15) #define MULLD XO31(233) #define MULHD XO31( 73) @@ -445,6 +447,7 @@ static int tcg_target_const_match (tcg_target_long val, #define BT(n, c) (((c)+((n)*4))<<21) #define BA(n, c) (((c)+((n)*4))<<16) #define BB(n, c) (((c)+((n)*4))<<11) +#define BC_(n, c) (((c)+((n)*4))<<6) #define BO_COND_TRUE BO (12) #define BO_COND_FALSE BO ( 4) @@ -470,6 +473,20 @@ static const uint32_t tcg_to_bc[] = { [TCG_COND_GTU] = BC | BI (7, CR_GT) | BO_COND_TRUE, }; +/* The low bit here is set if the RA and RB fields must be inverted. */ +static const uint32_t tcg_to_isel[] = { + [TCG_COND_EQ] = ISEL | BC_(7, CR_EQ), + [TCG_COND_NE] = ISEL | BC_(7, CR_EQ) | 1, + [TCG_COND_LT] = ISEL | BC_(7, CR_LT), + [TCG_COND_GE] = ISEL | BC_(7, CR_LT) | 1, + [TCG_COND_LE] = ISEL | BC_(7, CR_GT) | 1, + [TCG_COND_GT] = ISEL | BC_(7, CR_GT), + [TCG_COND_LTU] = ISEL | BC_(7, CR_LT), + [TCG_COND_GEU] = ISEL | BC_(7, CR_LT) | 1, + [TCG_COND_LEU] = ISEL | BC_(7, CR_GT) | 1, + [TCG_COND_GTU] = ISEL | BC_(7, CR_GT), +}; + static inline void tcg_out_mov(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg) { @@ -1131,79 +1148,119 @@ static void tcg_out_cmp(TCGContext *s, int cond, TCGArg arg1, TCGArg arg2, } } -static void tcg_out_setcond (TCGContext *s, TCGType type, TCGCond cond, - TCGArg arg0, TCGArg arg1, TCGArg arg2, - int const_arg2) +static void tcg_out_setcond_eq0(TCGContext *s, TCGType type, + TCGReg dst, TCGReg src) { - int crop, sh, arg; + tcg_out32(s, (type == TCG_TYPE_I64 ? CNTLZD : CNTLZW) | RS(src) | RA(dst)); + tcg_out_shri64(s, dst, dst, type == TCG_TYPE_I64 ? 6 : 5); +} - switch (cond) { - case TCG_COND_EQ: - if (const_arg2) { - if (!arg2) { - arg = arg1; - } - else { - arg = 0; - if ((uint16_t) arg2 == arg2) { - tcg_out32(s, XORI | SAI(arg1, 0, arg2)); - } - else { - tcg_out_movi (s, type, 0, arg2); - tcg_out32 (s, XOR | SAB (arg1, 0, 0)); - } - } - } - else { - arg = 0; - tcg_out32 (s, XOR | SAB (arg1, 0, arg2)); - } +static void tcg_out_setcond_ne0(TCGContext *s, TCGReg dst, TCGReg src) +{ + /* X != 0 implies X + -1 generates a carry. Extra addition + trickery means: R = X-1 + ~X + C = X-1 + (-X+1) + C = C. */ + if (dst != src) { + tcg_out32(s, ADDIC | TAI(dst, src, -1)); + tcg_out32(s, SUBFE | TAB(dst, dst, src)); + } else { + tcg_out32(s, ADDIC | TAI(0, src, -1)); + tcg_out32(s, SUBFE | TAB(dst, 0, src)); + } +} - if (type == TCG_TYPE_I64) { - tcg_out32 (s, CNTLZD | RS (arg) | RA (0)); - tcg_out_rld (s, RLDICL, arg0, 0, 58, 6); - } - else { - tcg_out32 (s, CNTLZW | RS (arg) | RA (0)); - tcg_out_rlw(s, RLWINM, arg0, 0, 27, 5, 31); +static TCGReg tcg_gen_setcond_xor(TCGContext *s, TCGReg arg1, TCGArg arg2, + bool const_arg2) +{ + if (const_arg2) { + if ((uint32_t)arg2 == arg2) { + tcg_out_xori32(s, TCG_REG_R0, arg1, arg2); + } else { + tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_R0, arg2); + tcg_out32(s, XOR | SAB(arg1, TCG_REG_R0, TCG_REG_R0)); } - break; + } else { + tcg_out32(s, XOR | SAB(arg1, TCG_REG_R0, arg2)); + } + return TCG_REG_R0; +} - case TCG_COND_NE: - if (const_arg2) { - if (!arg2) { - arg = arg1; - } - else { - arg = 0; - if ((uint16_t) arg2 == arg2) { - tcg_out32(s, XORI | SAI(arg1, 0, arg2)); - } else { - tcg_out_movi (s, type, 0, arg2); - tcg_out32 (s, XOR | SAB (arg1, 0, 0)); - } +static void tcg_out_setcond(TCGContext *s, TCGType type, TCGCond cond, + TCGArg arg0, TCGArg arg1, TCGArg arg2, + int const_arg2) +{ + int crop, sh; + + /* Ignore high bits of a potential constant arg2. */ + if (type == TCG_TYPE_I32) { + arg2 = (uint32_t)arg2; + } + + /* Handle common and trivial cases before handling anything else. */ + if (arg2 == 0) { + switch (cond) { + case TCG_COND_EQ: + tcg_out_setcond_eq0(s, type, arg0, arg1); + return; + case TCG_COND_NE: + if (type == TCG_TYPE_I32) { + tcg_out_ext32u(s, TCG_REG_R0, arg1); + arg1 = TCG_REG_R0; } + tcg_out_setcond_ne0(s, arg0, arg1); + return; + case TCG_COND_GE: + tcg_out32(s, NOR | SAB(arg1, arg0, arg1)); + arg1 = arg0; + /* FALLTHRU */ + case TCG_COND_LT: + /* Extract the sign bit. */ + tcg_out_rld(s, RLDICL, arg0, arg1, + type == TCG_TYPE_I64 ? 1 : 33, 63); + return; + default: + break; } - else { - arg = 0; - tcg_out32 (s, XOR | SAB (arg1, 0, arg2)); - } + } - /* Make sure and discard the high 32-bits of the input. */ - if (type == TCG_TYPE_I32) { - tcg_out32(s, EXTSW | RA(TCG_REG_R0) | RS(arg)); - arg = TCG_REG_R0; - } + /* If we have ISEL, we can implement everything with 3 or 4 insns. + All other cases below are also at least 3 insns, so speed up the + code generator by not considering them and always using ISEL. */ + if (HAVE_ISEL) { + int isel, tab; + + tcg_out_cmp(s, cond, arg1, arg2, const_arg2, 7, type); + + isel = tcg_to_isel[cond]; - if (arg == arg1 && arg1 == arg0) { - tcg_out32(s, ADDIC | TAI(0, arg, -1)); - tcg_out32(s, SUBFE | TAB(arg0, 0, arg)); + tcg_out_movi(s, type, arg0, 1); + if (isel & 1) { + /* arg0 = (bc ? 0 : 1) */ + tab = TAB(arg0, 0, arg0); + isel &= ~1; + } else { + /* arg0 = (bc ? 1 : 0) */ + tcg_out_movi(s, type, TCG_REG_R0, 0); + tab = TAB(arg0, arg0, TCG_REG_R0); } - else { - tcg_out32(s, ADDIC | TAI(arg0, arg, -1)); - tcg_out32(s, SUBFE | TAB(arg0, arg0, arg)); + tcg_out32(s, isel | tab); + return; + } + + switch (cond) { + case TCG_COND_EQ: + arg1 = tcg_gen_setcond_xor(s, arg1, arg2, const_arg2); + tcg_out_setcond_eq0(s, type, arg0, arg1); + return; + + case TCG_COND_NE: + arg1 = tcg_gen_setcond_xor(s, arg1, arg2, const_arg2); + /* Discard the high bits only once, rather than both inputs. */ + if (type == TCG_TYPE_I32) { + tcg_out_ext32u(s, TCG_REG_R0, arg1); + arg1 = TCG_REG_R0; } - break; + tcg_out_setcond_ne0(s, arg0, arg1); + return; case TCG_COND_GT: case TCG_COND_GTU: