From patchwork Wed Dec  4 12:56:29 2013
Content-Type: text/plain; charset="utf-8"
MIME-Version: 1.0
Content-Transfer-Encoding: 7bit
X-Patchwork-Submitter: Richard Sandiford <rdsandiford@googlemail.com>
X-Patchwork-Id: 296508
Return-Path: 
 <gcc-patches-return-357107-incoming=patchwork.ozlabs.org@gcc.gnu.org>
X-Original-To: incoming@patchwork.ozlabs.org
Delivered-To: patchwork-incoming@bilbo.ozlabs.org
Received: from sourceware.org (server1.sourceware.org [209.132.180.131])
	(using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256
	bits)) (Client did not present a certificate)
	by ozlabs.org (Postfix) with ESMTPS id 886D42C00B6
	for <incoming@patchwork.ozlabs.org>;
	Wed,  4 Dec 2013 23:56:54 +1100 (EST)
DomainKey-Signature: a=rsa-sha1; c=nofws; d=gcc.gnu.org; h=list-id
	:list-unsubscribe:list-archive:list-post:list-help:sender:from
	:to:cc:subject:references:date:in-reply-to:message-id
	:mime-version:content-type; q=dns; s=default; b=QlHufpF2cTUI/slo
	bpiRC5b6NpmoZZF/dMlEgJja8GXx0zq6/mXSmM94E/WS2Xm4AAdXcSCD9/w2yQGD
	DUnO9g8qMAYbf0QB0YP1rBElZRD7mUCDOhz/LKPC9VaWImdYTIHKSz4qn02avGb2
	ysTV0XR1Ba1K1w5EaS+LH1bWOvY=
DKIM-Signature: v=1; a=rsa-sha1; c=relaxed; d=gcc.gnu.org; h=list-id
	:list-unsubscribe:list-archive:list-post:list-help:sender:from
	:to:cc:subject:references:date:in-reply-to:message-id
	:mime-version:content-type; s=default; bh=8HluQPVO6v8yvnwHzTCMq2
	+nJx4=; b=jgeXvNzGdgnWeTGfB97T9ng+WCzWwEpIgvCOGHHFEF/FNII6x9m6PR
	fq4vm7/lerhUBaPT0Hfp5u0AFE99gdkPBQSAg3TPZvA+TtmfR2VUF1UM6Q+BKvXC
	xBIc0XQ9HWo8Vymt1U0a3dspuTVcoaAelm/+l39ifoHk5j2bvsBJE=
Received: (qmail 22112 invoked by alias); 4 Dec 2013 12:56:45 -0000
Mailing-List: contact gcc-patches-help@gcc.gnu.org; run by ezmlm
Precedence: bulk
List-Id: <gcc-patches.gcc.gnu.org>
List-Unsubscribe: <mailto:gcc-patches-unsubscribe-##L=##H@gcc.gnu.org>
List-Archive: <http://gcc.gnu.org/ml/gcc-patches/>
List-Post: <mailto:gcc-patches@gcc.gnu.org>
List-Help: <mailto:gcc-patches-help@gcc.gnu.org>
Sender: gcc-patches-owner@gcc.gnu.org
Delivered-To: mailing list gcc-patches@gcc.gnu.org
Received: (qmail 22102 invoked by uid 89); 4 Dec 2013 12:56:44 -0000
Authentication-Results: sourceware.org; auth=none
X-Virus-Found: No
X-Spam-SWARE-Status: No, score=0.7 required=5.0 tests=AWL, BAYES_50,
	FREEMAIL_FROM, RDNS_NONE, SPF_PASS,
	URIBL_BLOCKED autolearn=no version=3.3.2
X-HELO: mail-wi0-f180.google.com
Received: from Unknown (HELO mail-wi0-f180.google.com) (209.85.212.180) by
	sourceware.org (qpsmtpd/0.93/v0.84-503-g423c35a) with
	(AES128-SHA encrypted) ESMTPS; Wed, 04 Dec 2013 12:56:43 +0000
Received: by mail-wi0-f180.google.com with SMTP id hn9so3720649wib.7 for
	<gcc-patches@gcc.gnu.org>; Wed, 04 Dec 2013 04:56:34 -0800 (PST)
X-Received: by 10.194.88.138 with SMTP id bg10mr11871792wjb.56.1386161793991;
	Wed, 04 Dec 2013 04:56:33 -0800 (PST)
Received: from localhost ([2.28.235.199]) by mx.google.com with ESMTPSA id
	g16sm6752326wiw.6.2013.12.04.04.56.32 for <multiple
	recipients> (version=TLSv1.2
	cipher=ECDHE-RSA-AES128-GCM-SHA256 bits=128/128);
	Wed, 04 Dec 2013 04:56:33 -0800 (PST)
From: Richard Sandiford <rdsandiford@googlemail.com>
To: gcc-patches@gcc.gnu.org
Mail-Followup-To: gcc-patches@gcc.gnu.org, zadeck@naturalbridge.com,
	mikestump@comcast.net, rdsandiford@googlemail.com
Cc: zadeck@naturalbridge.com,  mikestump@comcast.net
Subject: Re: [wide-int] Add fast path for hosts with HWI widening
	multiplication
References: <8761r8kgph.fsf@talisman.default>
Date: Wed, 04 Dec 2013 12:56:29 +0000
In-Reply-To: <8761r8kgph.fsf@talisman.default> (Richard Sandiford's message
	of	"Sun, 01 Dec 2013 10:28:58 +0000")
Message-ID: <871u1sdbb6.fsf@talisman.default>
User-Agent: Gnus/5.13 (Gnus v5.13) Emacs/24.3 (gnu/linux)
MIME-Version: 1.0

Richard Sandiford <rdsandiford@googlemail.com> writes:
> This patch handles multiplications using a single HWIxHWI->2HWI multiplication
> on hosts that have one.  This removes all uses of the slow (half-HWI) path
> for insn-recog.ii.  The slow path is still used 58 times for cp/parser.ii
> and 168 times for fold-const.ii, but at that kind of level it shouldn't
> matter much.
>
> I followed Joseph's suggestion and reused longlong.h.  I copied it from
> libgcc rather than glibc since it seemed better for GCC to have a single
> version across both gcc/ and libgcc/.  I can put it in include/ if that
> seems better.

I've committed the patch to move longlong.h to trunk and merged back
to the branch, so all that's left is the wide-int.cc patch.  OK to install?

Thanks,
Richard

Index: gcc/wide-int.cc
===================================================================
--- gcc/wide-int.cc	2013-12-03 23:59:08.133658567 +0000
+++ gcc/wide-int.cc	2013-12-04 12:55:28.466895358 +0000
@@ -27,6 +27,16 @@ along with GCC; see the file COPYING3.
 #include "tree.h"
 #include "dumpfile.h"
 
+#if GCC_VERSION >= 3000
+#define W_TYPE_SIZE HOST_BITS_PER_WIDE_INT
+typedef unsigned HOST_HALF_WIDE_INT UHWtype;
+typedef unsigned HOST_WIDE_INT UWtype;
+typedef unsigned int UQItype __attribute__ ((mode (QI)));
+typedef unsigned int USItype __attribute__ ((mode (SI)));
+typedef unsigned int UDItype __attribute__ ((mode (DI)));
+#include "longlong.h"
+#endif
+
 /* This is the maximal size of the buffer needed for dump.  */
 const unsigned int MAX_SIZE = (4 * (MAX_BITSIZE_MODE_ANY_INT / 4
 				    + (MAX_BITSIZE_MODE_ANY_INT
@@ -1255,8 +1265,8 @@ wi_pack (unsigned HOST_WIDE_INT *result,
    record in *OVERFLOW whether the result overflowed.  SGN controls
    the signedness and is used to check overflow or if HIGH is set.  */
 unsigned int
-wi::mul_internal (HOST_WIDE_INT *val, const HOST_WIDE_INT *op1,
-		  unsigned int op1len, const HOST_WIDE_INT *op2,
+wi::mul_internal (HOST_WIDE_INT *val, const HOST_WIDE_INT *op1val,
+		  unsigned int op1len, const HOST_WIDE_INT *op2val,
 		  unsigned int op2len, unsigned int prec, signop sgn,
 		  bool *overflow, bool high)
 {
@@ -1285,24 +1295,53 @@ wi::mul_internal (HOST_WIDE_INT *val, co
   if (needs_overflow)
     *overflow = false;
 
+  wide_int_ref op1 = wi::storage_ref (op1val, op1len, prec);
+  wide_int_ref op2 = wi::storage_ref (op2val, op2len, prec);
+
   /* This is a surprisingly common case, so do it first.  */
-  if ((op1len == 1 && op1[0] == 0) || (op2len == 1 && op2[0] == 0))
+  if (op1 == 0 || op2 == 0)
     {
       val[0] = 0;
       return 1;
     }
 
+#ifdef umul_ppmm
+  if (sgn == UNSIGNED)
+    {
+      /* If the inputs are single HWIs and the output has room for at
+	 least two HWIs, we can use umul_ppmm directly.  */
+      if (prec >= HOST_BITS_PER_WIDE_INT * 2
+	  && wi::fits_uhwi_p (op1)
+	  && wi::fits_uhwi_p (op2))
+	{
+	  umul_ppmm (val[1], val[0], op1.ulow (), op2.ulow ());
+	  return 1 + (val[1] != 0 || val[0] < 0);
+	}
+      /* Likewise if the output is a full single HWI, except that the
+	 upper HWI of the result is only used for determining overflow.
+	 (We handle this case inline when overflow isn't needed.)  */
+      else if (prec == HOST_BITS_PER_WIDE_INT)
+	{
+	  unsigned HOST_WIDE_INT upper;
+	  umul_ppmm (upper, val[0], op1.ulow (), op2.ulow ());
+	  if (needs_overflow)
+	    *overflow = (upper != 0);
+	  return 1;
+	}
+    }
+#endif
+
   /* Handle multiplications by 1.  */
-  if (op1len == 1 && op1[0] == 1)
+  if (op1 == 1)
     {
       for (i = 0; i < op2len; i++)
-	val[i] = op2[i];
+	val[i] = op2val[i];
       return op2len;
     }
-  if (op2len == 1 && op2[0] == 1)
+  if (op2 == 1)
     {
       for (i = 0; i < op1len; i++)
-	val[i] = op1[i];
+	val[i] = op1val[i];
       return op1len;
     }
 
@@ -1316,13 +1355,13 @@ wi::mul_internal (HOST_WIDE_INT *val, co
 
       if (sgn == SIGNED)
 	{
-	  o0 = sext_hwi (op1[0], prec);
-	  o1 = sext_hwi (op2[0], prec);
+	  o0 = op1.to_shwi ();
+	  o1 = op2.to_shwi ();
 	}
       else
 	{
-	  o0 = zext_hwi (op1[0], prec);
-	  o1 = zext_hwi (op2[0], prec);
+	  o0 = op1.to_uhwi ();
+	  o1 = op2.to_uhwi ();
 	}
 
       r = o0 * o1;
@@ -1344,9 +1383,9 @@ wi::mul_internal (HOST_WIDE_INT *val, co
     }
 
   /* We do unsigned mul and then correct it.  */
-  wi_unpack (u, (const unsigned HOST_WIDE_INT*)op1, op1len,
+  wi_unpack (u, (const unsigned HOST_WIDE_INT *) op1val, op1len,
 	     half_blocks_needed, prec, SIGNED);
-  wi_unpack (v, (const unsigned HOST_WIDE_INT*)op2, op2len,
+  wi_unpack (v, (const unsigned HOST_WIDE_INT *) op2val, op2len,
 	     half_blocks_needed, prec, SIGNED);
 
   /* The 2 is for a full mult.  */
@@ -1371,7 +1410,7 @@ wi::mul_internal (HOST_WIDE_INT *val, co
   if (sgn == SIGNED && (high || needs_overflow))
     {
       unsigned HOST_WIDE_INT b;
-      if (op1[op1len-1] < 0)
+      if (wi::neg_p (op1))
 	{
 	  b = 0;
 	  for (i = 0; i < half_blocks_needed; i++)
@@ -1382,7 +1421,7 @@ wi::mul_internal (HOST_WIDE_INT *val, co
 	      b = t >> (HOST_BITS_PER_WIDE_INT - 1);
 	    }
 	}
-      if (op2[op2len-1] < 0)
+      if (wi::neg_p (op2))
 	{
 	  b = 0;
 	  for (i = 0; i < half_blocks_needed; i++)