Patchwork [simplify-rtx] Fix 16-bit -> 64-bit multiply and accumulate

login
register
mail settings
Submitter Andrew Stubbs
Date May 26, 2011, 1:35 p.m.
Message ID <4DDE572A.6060305@codesourcery.com>
Download mbox | patch
Permalink /patch/97575/
State New
Headers show

Comments

Andrew Stubbs - May 26, 2011, 1:35 p.m.
On 25/05/11 14:47, Joseph S. Myers wrote:
> The shift must be by a positive constant amount, strictly less than the
> precision (GET_MODE_PRECISION) of the mode (of the value being shifted).
> If that applies, the relevant number of bits is the precision of the mode
> minus the number of bits of the shift.  For an extension, just take the
> number of bits in the inner mode.  Add the two numbers of bits; if the
> result does not exceed the number of bits in the mode (of the operands and
> the multiplication) then the multiplication won't overflow.

I believe the attached should implement what you describe.

Is the patch OK now?

Andrew
Joseph S. Myers - May 26, 2011, 1:56 p.m.
On Thu, 26 May 2011, Andrew Stubbs wrote:

> On 25/05/11 14:47, Joseph S. Myers wrote:
> > The shift must be by a positive constant amount, strictly less than the
> > precision (GET_MODE_PRECISION) of the mode (of the value being shifted).
> > If that applies, the relevant number of bits is the precision of the mode
> > minus the number of bits of the shift.  For an extension, just take the
> > number of bits in the inner mode.  Add the two numbers of bits; if the
> > result does not exceed the number of bits in the mode (of the operands and
> > the multiplication) then the multiplication won't overflow.
> 
> I believe the attached should implement what you describe.
> 
> Is the patch OK now?

It appears to implement the logic I described, so I have no further 
comments on it (but can't review it).
Richard Earnshaw - June 2, 2011, 9:46 a.m.
On Thu, 2011-05-26 at 14:35 +0100, Andrew Stubbs wrote:
> On 25/05/11 14:47, Joseph S. Myers wrote:
> > The shift must be by a positive constant amount, strictly less than the
> > precision (GET_MODE_PRECISION) of the mode (of the value being shifted).
> > If that applies, the relevant number of bits is the precision of the mode
> > minus the number of bits of the shift.  For an extension, just take the
> > number of bits in the inner mode.  Add the two numbers of bits; if the
> > result does not exceed the number of bits in the mode (of the operands and
> > the multiplication) then the multiplication won't overflow.
> 
> I believe the attached should implement what you describe.
> 
> Is the patch OK now?
> 
> Andrew

OK.

R.
Andrew Stubbs - June 7, 2011, 11:05 a.m.
On 02/06/11 10:46, Richard Earnshaw wrote:
> OK.

Committed, thanks.

Andrew

Patch

2011-05-26  Bernd Schmidt  <bernds@codesourcery.com>
	    Andrew Stubbs  <ams@codesourcery.com>

	gcc/
	* simplify-rtx.c (simplify_unary_operation_1): Canonicalize widening
	multiplies.
	* doc/md.texi (Canonicalization of Instructions): Document widening
	multiply canonicalization.

	gcc/testsuite/
	* gcc.target/arm/mla-2.c: New test.

--- a/gcc/doc/md.texi
+++ b/gcc/doc/md.texi
@@ -5840,6 +5840,23 @@  Equality comparisons of a group of bits (usually a single bit) with zero
 will be written using @code{zero_extract} rather than the equivalent
 @code{and} or @code{sign_extract} operations.
 
+@cindex @code{mult}, canonicalization of
+@item
+@code{(sign_extend:@var{m1} (mult:@var{m2} (sign_extend:@var{m2} @var{x})
+(sign_extend:@var{m2} @var{y})))} is converted to @code{(mult:@var{m1}
+(sign_extend:@var{m1} @var{x}) (sign_extend:@var{m1} @var{y}))}, and likewise
+for @code{zero_extend}.
+
+@item
+@code{(sign_extend:@var{m1} (mult:@var{m2} (ashiftrt:@var{m2}
+@var{x} @var{s}) (sign_extend:@var{m2} @var{y})))} is converted
+to @code{(mult:@var{m1} (sign_extend:@var{m1} (ashiftrt:@var{m2}
+@var{x} @var{s})) (sign_extend:@var{m1} @var{y}))}, and likewise for
+patterns using @code{zero_extend} and @code{lshiftrt}.  If the second
+operand of @code{mult} is also a shift, then that is extended also.
+This transformation is only applied when it can be proven that the
+original operation had sufficient precision to prevent overflow.
+
 @end itemize
 
 Further canonicalization rules are defined in the function
--- a/gcc/simplify-rtx.c
+++ b/gcc/simplify-rtx.c
@@ -1000,6 +1000,48 @@  simplify_unary_operation_1 (enum rtx_code code, enum machine_mode mode, rtx op)
 	  && GET_CODE (XEXP (XEXP (op, 0), 1)) == LABEL_REF)
 	return XEXP (op, 0);
 
+      /* Extending a widening multiplication should be canonicalized to
+	 a wider widening multiplication.  */
+      if (GET_CODE (op) == MULT)
+	{
+	  rtx lhs = XEXP (op, 0);
+	  rtx rhs = XEXP (op, 1);
+	  enum rtx_code lcode = GET_CODE (lhs);
+	  enum rtx_code rcode = GET_CODE (rhs);
+
+	  /* Widening multiplies usually extend both operands, but sometimes
+	     they use a shift to extract a portion of a register.  */
+	  if ((lcode == SIGN_EXTEND
+	       || (lcode == ASHIFTRT && CONST_INT_P (XEXP (lhs, 1))))
+	      && (rcode == SIGN_EXTEND
+		  || (rcode == ASHIFTRT && CONST_INT_P (XEXP (rhs, 1)))))
+	    {
+	      enum machine_mode lmode = GET_MODE (lhs);
+	      enum machine_mode rmode = GET_MODE (rhs);
+	      int bits;
+
+	      if (lcode == ASHIFTRT)
+		/* Number of bits not shifted off the end.  */
+		bits = GET_MODE_PRECISION (lmode) - INTVAL (XEXP (lhs, 1));
+	      else /* lcode == SIGN_EXTEND */
+		/* Size of inner mode.  */
+		bits = GET_MODE_PRECISION (GET_MODE (XEXP (lhs, 0)));
+
+	      if (rcode == ASHIFTRT)
+		bits += GET_MODE_PRECISION (rmode) - INTVAL (XEXP (rhs, 1));
+	      else /* rcode == SIGN_EXTEND */
+		bits += GET_MODE_PRECISION (GET_MODE (XEXP (rhs, 0)));
+
+	      /* We can only widen multiplies if the result is mathematiclly
+		 equivalent.  I.e. if overflow was impossible.  */
+	      if (bits <= GET_MODE_PRECISION (GET_MODE (op)))
+		return simplify_gen_binary
+			 (MULT, mode,
+			  simplify_gen_unary (SIGN_EXTEND, mode, lhs, lmode),
+			  simplify_gen_unary (SIGN_EXTEND, mode, rhs, rmode));
+	    }
+	}
+
       /* Check for a sign extension of a subreg of a promoted
 	 variable, where the promotion is sign-extended, and the
 	 target mode is the same as the variable's promotion.  */
@@ -1071,6 +1113,48 @@  simplify_unary_operation_1 (enum rtx_code code, enum machine_mode mode, rtx op)
 	  && GET_MODE_SIZE (mode) <= GET_MODE_SIZE (GET_MODE (XEXP (op, 0))))
 	return rtl_hooks.gen_lowpart_no_emit (mode, op);
 
+      /* Extending a widening multiplication should be canonicalized to
+	 a wider widening multiplication.  */
+      if (GET_CODE (op) == MULT)
+	{
+	  rtx lhs = XEXP (op, 0);
+	  rtx rhs = XEXP (op, 1);
+	  enum rtx_code lcode = GET_CODE (lhs);
+	  enum rtx_code rcode = GET_CODE (rhs);
+
+	  /* Widening multiplies usually extend both operands, but sometimes
+	     they use a shift to extract a portion of a register.  */
+	  if ((lcode == ZERO_EXTEND
+	       || (lcode == LSHIFTRT && CONST_INT_P (XEXP (lhs, 1))))
+	      && (rcode == ZERO_EXTEND
+		  || (rcode == LSHIFTRT && CONST_INT_P (XEXP (rhs, 1)))))
+	    {
+	      enum machine_mode lmode = GET_MODE (lhs);
+	      enum machine_mode rmode = GET_MODE (rhs);
+	      int bits;
+
+	      if (lcode == LSHIFTRT)
+		/* Number of bits not shifted off the end.  */
+		bits = GET_MODE_PRECISION (lmode) - INTVAL (XEXP (lhs, 1));
+	      else /* lcode == ZERO_EXTEND */
+		/* Size of inner mode.  */
+		bits = GET_MODE_PRECISION (GET_MODE (XEXP (lhs, 0)));
+
+	      if (rcode == LSHIFTRT)
+		bits += GET_MODE_PRECISION (rmode) - INTVAL (XEXP (rhs, 1));
+	      else /* rcode == ZERO_EXTEND */
+		bits += GET_MODE_PRECISION (GET_MODE (XEXP (rhs, 0)));
+
+	      /* We can only widen multiplies if the result is mathematiclly
+		 equivalent.  I.e. if overflow was impossible.  */
+	      if (bits <= GET_MODE_PRECISION (GET_MODE (op)))
+		return simplify_gen_binary
+			 (MULT, mode,
+			  simplify_gen_unary (ZERO_EXTEND, mode, lhs, lmode),
+			  simplify_gen_unary (ZERO_EXTEND, mode, rhs, rmode));
+	    }
+	}
+
       /* (zero_extend:M (zero_extend:N <X>)) is (zero_extend:M <X>).  */
       if (GET_CODE (op) == ZERO_EXTEND)
 	return simplify_gen_unary (ZERO_EXTEND, mode, XEXP (op, 0),
--- /dev/null
+++ b/gcc/testsuite/gcc.target/arm/mla-2.c
@@ -0,0 +1,9 @@ 
+/* { dg-do compile } */
+/* { dg-options "-O2 -march=armv7-a" } */
+
+long long foolong (long long x, short *a, short *b)
+{
+    return x + *a * *b;
+}
+
+/* { dg-final { scan-assembler "smlalbb" } } */