diff mbox

PR43902 patch: Widening multiply-accumulate

Message ID 4C246F77.3080303@codesourcery.com
State New
Headers show

Commit Message

Bernd Schmidt June 25, 2010, 8:57 a.m. UTC
On 06/24/2010 11:04 PM, Richard Guenther wrote:
> On Thu, Jun 24, 2010 at 10:59 PM, Bernd Schmidt <bernds@codesourcery.com> wrote:
>> Uh, which ones are the middle-end changes?  Do we have that defined as a
>> set of files anywhere?
> 
> ;)
> 
> I meant all but the arm backend changes.  I guess I should have said so.

Oh.  Right.

Richard E., how about those?


Bernd
PR target/43902
	* config/arm/arm.md (maddsidi4, umaddsidi4): New expanders.
	(maddhisi4): Renamed from mulhisi3addsi.  Operands renumbered.
	(maddhidi4): Likewise.
	
	PR target/43902
	* gcc.target/arm/wmul-1.c: Test for smlabb instead of smulbb.
	* gcc.target/arm/wmul-3.c: New test.
	* gcc.target/arm/wmul-4.c: New test.

Comments

Richard Earnshaw June 25, 2010, 12:39 p.m. UTC | #1
On Fri, 2010-06-25 at 10:57 +0200, Bernd Schmidt wrote:
> On 06/24/2010 11:04 PM, Richard Guenther wrote:
> > On Thu, Jun 24, 2010 at 10:59 PM, Bernd Schmidt <bernds@codesourcery.com> wrote:
> >> Uh, which ones are the middle-end changes?  Do we have that defined as a
> >> set of files anywhere?
> > 
> > ;)
> > 
> > I meant all but the arm backend changes.  I guess I should have said so.
> 
> Oh.  Right.
> 
> Richard E., how about those?
> 
> 
> Bernd

This is ok.

Note, there doesn't seem to me to be any benefit of marking a pair of
operands as commutative ("%" in constraint") when all the constraints
are identical -- it just causes more work for the compiler without
giving any different code, since if it doesn't match the first way
round, it won't match the second either.

R.
diff mbox

Patch

Index: config/arm/arm.md
===================================================================
--- config/arm/arm.md	(revision 160997)
+++ config/arm/arm.md	(working copy)
@@ -1422,7 +1422,15 @@  (define_insn "*mulsi3subsi"
    (set_attr "predicable" "yes")]
 )
 
-;; Unnamed template to match long long multiply-accumulate (smlal)
+(define_expand "maddsidi4"
+  [(set (match_operand:DI 0 "s_register_operand" "")
+	(plus:DI
+	 (mult:DI
+	  (sign_extend:DI (match_operand:SI 1 "s_register_operand" ""))
+	  (sign_extend:DI (match_operand:SI 2 "s_register_operand" "")))
+	 (match_operand:DI 3 "s_register_operand" "")))]
+  "TARGET_32BIT && arm_arch3m"
+  "")
 
 (define_insn "*mulsidi3adddi"
   [(set (match_operand:DI 0 "s_register_operand" "=&r")
@@ -1518,7 +1526,15 @@  (define_insn "*umulsidi3_v6"
    (set_attr "predicable" "yes")]
 )
 
-;; Unnamed template to match long long unsigned multiply-accumulate (umlal)
+(define_expand "umaddsidi4"
+  [(set (match_operand:DI 0 "s_register_operand" "")
+	(plus:DI
+	 (mult:DI
+	  (zero_extend:DI (match_operand:SI 1 "s_register_operand" ""))
+	  (zero_extend:DI (match_operand:SI 2 "s_register_operand" "")))
+	 (match_operand:DI 3 "s_register_operand" "")))]
+  "TARGET_32BIT && arm_arch3m"
+  "")
 
 (define_insn "*umulsidi3adddi"
   [(set (match_operand:DI 0 "s_register_operand" "=&r")
@@ -1686,29 +1702,29 @@  (define_insn "*mulhisi3tt"
    (set_attr "predicable" "yes")]
 )
 
-(define_insn "*mulhisi3addsi"
+(define_insn "maddhisi4"
   [(set (match_operand:SI 0 "s_register_operand" "=r")
-	(plus:SI (match_operand:SI 1 "s_register_operand" "r")
+	(plus:SI (match_operand:SI 3 "s_register_operand" "r")
 		 (mult:SI (sign_extend:SI
-			   (match_operand:HI 2 "s_register_operand" "%r"))
+			   (match_operand:HI 1 "s_register_operand" "%r"))
 			  (sign_extend:SI
-			   (match_operand:HI 3 "s_register_operand" "r")))))]
+			   (match_operand:HI 2 "s_register_operand" "r")))))]
   "TARGET_DSP_MULTIPLY"
-  "smlabb%?\\t%0, %2, %3, %1"
+  "smlabb%?\\t%0, %1, %2, %3"
   [(set_attr "insn" "smlaxy")
    (set_attr "predicable" "yes")]
 )
 
-(define_insn "*mulhidi3adddi"
+(define_insn "*maddhidi4"
   [(set (match_operand:DI 0 "s_register_operand" "=r")
 	(plus:DI
-	  (match_operand:DI 1 "s_register_operand" "0")
+	  (match_operand:DI 3 "s_register_operand" "0")
 	  (mult:DI (sign_extend:DI
-	 	    (match_operand:HI 2 "s_register_operand" "%r"))
+	 	    (match_operand:HI 1 "s_register_operand" "%r"))
 		   (sign_extend:DI
-		    (match_operand:HI 3 "s_register_operand" "r")))))]
+		    (match_operand:HI 2 "s_register_operand" "r")))))]
   "TARGET_DSP_MULTIPLY"
-  "smlalbb%?\\t%Q0, %R0, %2, %3"
+  "smlalbb%?\\t%Q0, %R0, %1, %2"
   [(set_attr "insn" "smlalxy")
    (set_attr "predicable" "yes")])
 
Index: testsuite/gcc.target/arm/wmul-3.c
===================================================================
--- testsuite/gcc.target/arm/wmul-3.c	(revision 0)
+++ testsuite/gcc.target/arm/wmul-3.c	(revision 0)
@@ -0,0 +1,18 @@ 
+/* { dg-do compile } */
+/* { dg-options "-O2 -march=armv6t2" } */
+
+int mac(const short *a, const short *b, int sqr, int *sum)
+{
+  int i;
+  int dotp = *sum;
+
+  for (i = 0; i < 150; i++) {
+    dotp -= b[i] * a[i];
+    sqr -= b[i] * b[i];
+  }
+
+  *sum = dotp;
+  return sqr;
+}
+
+/* { dg-final { scan-assembler-times "smulbb" 2 } } */
Index: testsuite/gcc.target/arm/wmul-4.c
===================================================================
--- testsuite/gcc.target/arm/wmul-4.c	(revision 0)
+++ testsuite/gcc.target/arm/wmul-4.c	(revision 0)
@@ -0,0 +1,18 @@ 
+/* { dg-do compile } */
+/* { dg-options "-O2 -march=armv6t2" } */
+
+int mac(const int *a, const int *b, long long sqr, long long *sum)
+{
+  int i;
+  long long dotp = *sum;
+
+  for (i = 0; i < 150; i++) {
+    dotp += (long long) b[i] * a[i];
+    sqr += (long long) b[i] * b[i];
+  }
+
+  *sum = dotp;
+  return sqr;
+}
+
+/* { dg-final { scan-assembler-times "smlal" 2 } } */