From patchwork Wed Dec 4 12:56:29 2013 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Richard Sandiford X-Patchwork-Id: 296508 Return-Path: X-Original-To: incoming@patchwork.ozlabs.org Delivered-To: patchwork-incoming@bilbo.ozlabs.org Received: from sourceware.org (server1.sourceware.org [209.132.180.131]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (Client did not present a certificate) by ozlabs.org (Postfix) with ESMTPS id 886D42C00B6 for ; Wed, 4 Dec 2013 23:56:54 +1100 (EST) DomainKey-Signature: a=rsa-sha1; c=nofws; d=gcc.gnu.org; h=list-id :list-unsubscribe:list-archive:list-post:list-help:sender:from :to:cc:subject:references:date:in-reply-to:message-id :mime-version:content-type; q=dns; s=default; b=QlHufpF2cTUI/slo bpiRC5b6NpmoZZF/dMlEgJja8GXx0zq6/mXSmM94E/WS2Xm4AAdXcSCD9/w2yQGD DUnO9g8qMAYbf0QB0YP1rBElZRD7mUCDOhz/LKPC9VaWImdYTIHKSz4qn02avGb2 ysTV0XR1Ba1K1w5EaS+LH1bWOvY= DKIM-Signature: v=1; a=rsa-sha1; c=relaxed; d=gcc.gnu.org; h=list-id :list-unsubscribe:list-archive:list-post:list-help:sender:from :to:cc:subject:references:date:in-reply-to:message-id :mime-version:content-type; s=default; bh=8HluQPVO6v8yvnwHzTCMq2 +nJx4=; b=jgeXvNzGdgnWeTGfB97T9ng+WCzWwEpIgvCOGHHFEF/FNII6x9m6PR fq4vm7/lerhUBaPT0Hfp5u0AFE99gdkPBQSAg3TPZvA+TtmfR2VUF1UM6Q+BKvXC xBIc0XQ9HWo8Vymt1U0a3dspuTVcoaAelm/+l39ifoHk5j2bvsBJE= Received: (qmail 22112 invoked by alias); 4 Dec 2013 12:56:45 -0000 Mailing-List: contact gcc-patches-help@gcc.gnu.org; run by ezmlm Precedence: bulk List-Id: List-Unsubscribe: List-Archive: List-Post: List-Help: Sender: gcc-patches-owner@gcc.gnu.org Delivered-To: mailing list gcc-patches@gcc.gnu.org Received: (qmail 22102 invoked by uid 89); 4 Dec 2013 12:56:44 -0000 Authentication-Results: sourceware.org; auth=none X-Virus-Found: No X-Spam-SWARE-Status: No, score=0.7 required=5.0 tests=AWL, BAYES_50, FREEMAIL_FROM, RDNS_NONE, SPF_PASS, URIBL_BLOCKED autolearn=no version=3.3.2 X-HELO: mail-wi0-f180.google.com Received: from Unknown (HELO mail-wi0-f180.google.com) (209.85.212.180) by sourceware.org (qpsmtpd/0.93/v0.84-503-g423c35a) with (AES128-SHA encrypted) ESMTPS; Wed, 04 Dec 2013 12:56:43 +0000 Received: by mail-wi0-f180.google.com with SMTP id hn9so3720649wib.7 for ; Wed, 04 Dec 2013 04:56:34 -0800 (PST) X-Received: by 10.194.88.138 with SMTP id bg10mr11871792wjb.56.1386161793991; Wed, 04 Dec 2013 04:56:33 -0800 (PST) Received: from localhost ([2.28.235.199]) by mx.google.com with ESMTPSA id g16sm6752326wiw.6.2013.12.04.04.56.32 for (version=TLSv1.2 cipher=ECDHE-RSA-AES128-GCM-SHA256 bits=128/128); Wed, 04 Dec 2013 04:56:33 -0800 (PST) From: Richard Sandiford To: gcc-patches@gcc.gnu.org Mail-Followup-To: gcc-patches@gcc.gnu.org, zadeck@naturalbridge.com, mikestump@comcast.net, rdsandiford@googlemail.com Cc: zadeck@naturalbridge.com, mikestump@comcast.net Subject: Re: [wide-int] Add fast path for hosts with HWI widening multiplication References: <8761r8kgph.fsf@talisman.default> Date: Wed, 04 Dec 2013 12:56:29 +0000 In-Reply-To: <8761r8kgph.fsf@talisman.default> (Richard Sandiford's message of "Sun, 01 Dec 2013 10:28:58 +0000") Message-ID: <871u1sdbb6.fsf@talisman.default> User-Agent: Gnus/5.13 (Gnus v5.13) Emacs/24.3 (gnu/linux) MIME-Version: 1.0 Richard Sandiford writes: > This patch handles multiplications using a single HWIxHWI->2HWI multiplication > on hosts that have one. This removes all uses of the slow (half-HWI) path > for insn-recog.ii. The slow path is still used 58 times for cp/parser.ii > and 168 times for fold-const.ii, but at that kind of level it shouldn't > matter much. > > I followed Joseph's suggestion and reused longlong.h. I copied it from > libgcc rather than glibc since it seemed better for GCC to have a single > version across both gcc/ and libgcc/. I can put it in include/ if that > seems better. I've committed the patch to move longlong.h to trunk and merged back to the branch, so all that's left is the wide-int.cc patch. OK to install? Thanks, Richard Index: gcc/wide-int.cc =================================================================== --- gcc/wide-int.cc 2013-12-03 23:59:08.133658567 +0000 +++ gcc/wide-int.cc 2013-12-04 12:55:28.466895358 +0000 @@ -27,6 +27,16 @@ along with GCC; see the file COPYING3. #include "tree.h" #include "dumpfile.h" +#if GCC_VERSION >= 3000 +#define W_TYPE_SIZE HOST_BITS_PER_WIDE_INT +typedef unsigned HOST_HALF_WIDE_INT UHWtype; +typedef unsigned HOST_WIDE_INT UWtype; +typedef unsigned int UQItype __attribute__ ((mode (QI))); +typedef unsigned int USItype __attribute__ ((mode (SI))); +typedef unsigned int UDItype __attribute__ ((mode (DI))); +#include "longlong.h" +#endif + /* This is the maximal size of the buffer needed for dump. */ const unsigned int MAX_SIZE = (4 * (MAX_BITSIZE_MODE_ANY_INT / 4 + (MAX_BITSIZE_MODE_ANY_INT @@ -1255,8 +1265,8 @@ wi_pack (unsigned HOST_WIDE_INT *result, record in *OVERFLOW whether the result overflowed. SGN controls the signedness and is used to check overflow or if HIGH is set. */ unsigned int -wi::mul_internal (HOST_WIDE_INT *val, const HOST_WIDE_INT *op1, - unsigned int op1len, const HOST_WIDE_INT *op2, +wi::mul_internal (HOST_WIDE_INT *val, const HOST_WIDE_INT *op1val, + unsigned int op1len, const HOST_WIDE_INT *op2val, unsigned int op2len, unsigned int prec, signop sgn, bool *overflow, bool high) { @@ -1285,24 +1295,53 @@ wi::mul_internal (HOST_WIDE_INT *val, co if (needs_overflow) *overflow = false; + wide_int_ref op1 = wi::storage_ref (op1val, op1len, prec); + wide_int_ref op2 = wi::storage_ref (op2val, op2len, prec); + /* This is a surprisingly common case, so do it first. */ - if ((op1len == 1 && op1[0] == 0) || (op2len == 1 && op2[0] == 0)) + if (op1 == 0 || op2 == 0) { val[0] = 0; return 1; } +#ifdef umul_ppmm + if (sgn == UNSIGNED) + { + /* If the inputs are single HWIs and the output has room for at + least two HWIs, we can use umul_ppmm directly. */ + if (prec >= HOST_BITS_PER_WIDE_INT * 2 + && wi::fits_uhwi_p (op1) + && wi::fits_uhwi_p (op2)) + { + umul_ppmm (val[1], val[0], op1.ulow (), op2.ulow ()); + return 1 + (val[1] != 0 || val[0] < 0); + } + /* Likewise if the output is a full single HWI, except that the + upper HWI of the result is only used for determining overflow. + (We handle this case inline when overflow isn't needed.) */ + else if (prec == HOST_BITS_PER_WIDE_INT) + { + unsigned HOST_WIDE_INT upper; + umul_ppmm (upper, val[0], op1.ulow (), op2.ulow ()); + if (needs_overflow) + *overflow = (upper != 0); + return 1; + } + } +#endif + /* Handle multiplications by 1. */ - if (op1len == 1 && op1[0] == 1) + if (op1 == 1) { for (i = 0; i < op2len; i++) - val[i] = op2[i]; + val[i] = op2val[i]; return op2len; } - if (op2len == 1 && op2[0] == 1) + if (op2 == 1) { for (i = 0; i < op1len; i++) - val[i] = op1[i]; + val[i] = op1val[i]; return op1len; } @@ -1316,13 +1355,13 @@ wi::mul_internal (HOST_WIDE_INT *val, co if (sgn == SIGNED) { - o0 = sext_hwi (op1[0], prec); - o1 = sext_hwi (op2[0], prec); + o0 = op1.to_shwi (); + o1 = op2.to_shwi (); } else { - o0 = zext_hwi (op1[0], prec); - o1 = zext_hwi (op2[0], prec); + o0 = op1.to_uhwi (); + o1 = op2.to_uhwi (); } r = o0 * o1; @@ -1344,9 +1383,9 @@ wi::mul_internal (HOST_WIDE_INT *val, co } /* We do unsigned mul and then correct it. */ - wi_unpack (u, (const unsigned HOST_WIDE_INT*)op1, op1len, + wi_unpack (u, (const unsigned HOST_WIDE_INT *) op1val, op1len, half_blocks_needed, prec, SIGNED); - wi_unpack (v, (const unsigned HOST_WIDE_INT*)op2, op2len, + wi_unpack (v, (const unsigned HOST_WIDE_INT *) op2val, op2len, half_blocks_needed, prec, SIGNED); /* The 2 is for a full mult. */ @@ -1371,7 +1410,7 @@ wi::mul_internal (HOST_WIDE_INT *val, co if (sgn == SIGNED && (high || needs_overflow)) { unsigned HOST_WIDE_INT b; - if (op1[op1len-1] < 0) + if (wi::neg_p (op1)) { b = 0; for (i = 0; i < half_blocks_needed; i++) @@ -1382,7 +1421,7 @@ wi::mul_internal (HOST_WIDE_INT *val, co b = t >> (HOST_BITS_PER_WIDE_INT - 1); } } - if (op2[op2len-1] < 0) + if (wi::neg_p (op2)) { b = 0; for (i = 0; i < half_blocks_needed; i++)