Patchwork PR43902 patch: Widening multiply-accumulate

login
register
mail settings
Submitter Bernd Schmidt
Date June 25, 2010, 8:57 a.m.
Message ID <4C246F77.3080303@codesourcery.com>
Download mbox | patch
Permalink /patch/56894/
State New
Headers show

Comments

Bernd Schmidt - June 25, 2010, 8:57 a.m.
On 06/24/2010 11:04 PM, Richard Guenther wrote:
> On Thu, Jun 24, 2010 at 10:59 PM, Bernd Schmidt <bernds@codesourcery.com> wrote:
>> Uh, which ones are the middle-end changes?  Do we have that defined as a
>> set of files anywhere?
> 
> ;)
> 
> I meant all but the arm backend changes.  I guess I should have said so.

Oh.  Right.

Richard E., how about those?


Bernd
PR target/43902
	* config/arm/arm.md (maddsidi4, umaddsidi4): New expanders.
	(maddhisi4): Renamed from mulhisi3addsi.  Operands renumbered.
	(maddhidi4): Likewise.
	
	PR target/43902
	* gcc.target/arm/wmul-1.c: Test for smlabb instead of smulbb.
	* gcc.target/arm/wmul-3.c: New test.
	* gcc.target/arm/wmul-4.c: New test.
Richard Earnshaw - June 25, 2010, 12:39 p.m.
On Fri, 2010-06-25 at 10:57 +0200, Bernd Schmidt wrote:
> On 06/24/2010 11:04 PM, Richard Guenther wrote:
> > On Thu, Jun 24, 2010 at 10:59 PM, Bernd Schmidt <bernds@codesourcery.com> wrote:
> >> Uh, which ones are the middle-end changes?  Do we have that defined as a
> >> set of files anywhere?
> > 
> > ;)
> > 
> > I meant all but the arm backend changes.  I guess I should have said so.
> 
> Oh.  Right.
> 
> Richard E., how about those?
> 
> 
> Bernd

This is ok.

Note, there doesn't seem to me to be any benefit of marking a pair of
operands as commutative ("%" in constraint") when all the constraints
are identical -- it just causes more work for the compiler without
giving any different code, since if it doesn't match the first way
round, it won't match the second either.

R.

Patch

Index: config/arm/arm.md
===================================================================
--- config/arm/arm.md	(revision 160997)
+++ config/arm/arm.md	(working copy)
@@ -1422,7 +1422,15 @@  (define_insn "*mulsi3subsi"
    (set_attr "predicable" "yes")]
 )
 
-;; Unnamed template to match long long multiply-accumulate (smlal)
+(define_expand "maddsidi4"
+  [(set (match_operand:DI 0 "s_register_operand" "")
+	(plus:DI
+	 (mult:DI
+	  (sign_extend:DI (match_operand:SI 1 "s_register_operand" ""))
+	  (sign_extend:DI (match_operand:SI 2 "s_register_operand" "")))
+	 (match_operand:DI 3 "s_register_operand" "")))]
+  "TARGET_32BIT && arm_arch3m"
+  "")
 
 (define_insn "*mulsidi3adddi"
   [(set (match_operand:DI 0 "s_register_operand" "=&r")
@@ -1518,7 +1526,15 @@  (define_insn "*umulsidi3_v6"
    (set_attr "predicable" "yes")]
 )
 
-;; Unnamed template to match long long unsigned multiply-accumulate (umlal)
+(define_expand "umaddsidi4"
+  [(set (match_operand:DI 0 "s_register_operand" "")
+	(plus:DI
+	 (mult:DI
+	  (zero_extend:DI (match_operand:SI 1 "s_register_operand" ""))
+	  (zero_extend:DI (match_operand:SI 2 "s_register_operand" "")))
+	 (match_operand:DI 3 "s_register_operand" "")))]
+  "TARGET_32BIT && arm_arch3m"
+  "")
 
 (define_insn "*umulsidi3adddi"
   [(set (match_operand:DI 0 "s_register_operand" "=&r")
@@ -1686,29 +1702,29 @@  (define_insn "*mulhisi3tt"
    (set_attr "predicable" "yes")]
 )
 
-(define_insn "*mulhisi3addsi"
+(define_insn "maddhisi4"
   [(set (match_operand:SI 0 "s_register_operand" "=r")
-	(plus:SI (match_operand:SI 1 "s_register_operand" "r")
+	(plus:SI (match_operand:SI 3 "s_register_operand" "r")
 		 (mult:SI (sign_extend:SI
-			   (match_operand:HI 2 "s_register_operand" "%r"))
+			   (match_operand:HI 1 "s_register_operand" "%r"))
 			  (sign_extend:SI
-			   (match_operand:HI 3 "s_register_operand" "r")))))]
+			   (match_operand:HI 2 "s_register_operand" "r")))))]
   "TARGET_DSP_MULTIPLY"
-  "smlabb%?\\t%0, %2, %3, %1"
+  "smlabb%?\\t%0, %1, %2, %3"
   [(set_attr "insn" "smlaxy")
    (set_attr "predicable" "yes")]
 )
 
-(define_insn "*mulhidi3adddi"
+(define_insn "*maddhidi4"
   [(set (match_operand:DI 0 "s_register_operand" "=r")
 	(plus:DI
-	  (match_operand:DI 1 "s_register_operand" "0")
+	  (match_operand:DI 3 "s_register_operand" "0")
 	  (mult:DI (sign_extend:DI
-	 	    (match_operand:HI 2 "s_register_operand" "%r"))
+	 	    (match_operand:HI 1 "s_register_operand" "%r"))
 		   (sign_extend:DI
-		    (match_operand:HI 3 "s_register_operand" "r")))))]
+		    (match_operand:HI 2 "s_register_operand" "r")))))]
   "TARGET_DSP_MULTIPLY"
-  "smlalbb%?\\t%Q0, %R0, %2, %3"
+  "smlalbb%?\\t%Q0, %R0, %1, %2"
   [(set_attr "insn" "smlalxy")
    (set_attr "predicable" "yes")])
 
Index: testsuite/gcc.target/arm/wmul-3.c
===================================================================
--- testsuite/gcc.target/arm/wmul-3.c	(revision 0)
+++ testsuite/gcc.target/arm/wmul-3.c	(revision 0)
@@ -0,0 +1,18 @@ 
+/* { dg-do compile } */
+/* { dg-options "-O2 -march=armv6t2" } */
+
+int mac(const short *a, const short *b, int sqr, int *sum)
+{
+  int i;
+  int dotp = *sum;
+
+  for (i = 0; i < 150; i++) {
+    dotp -= b[i] * a[i];
+    sqr -= b[i] * b[i];
+  }
+
+  *sum = dotp;
+  return sqr;
+}
+
+/* { dg-final { scan-assembler-times "smulbb" 2 } } */
Index: testsuite/gcc.target/arm/wmul-4.c
===================================================================
--- testsuite/gcc.target/arm/wmul-4.c	(revision 0)
+++ testsuite/gcc.target/arm/wmul-4.c	(revision 0)
@@ -0,0 +1,18 @@ 
+/* { dg-do compile } */
+/* { dg-options "-O2 -march=armv6t2" } */
+
+int mac(const int *a, const int *b, long long sqr, long long *sum)
+{
+  int i;
+  long long dotp = *sum;
+
+  for (i = 0; i < 150; i++) {
+    dotp += (long long) b[i] * a[i];
+    sqr += (long long) b[i] * b[i];
+  }
+
+  *sum = dotp;
+  return sqr;
+}
+
+/* { dg-final { scan-assembler-times "smlal" 2 } } */