[AArch64] Implement SIMD Absolute Difference Instructions

Submitted by Hurugalawadi, Naveen on Feb. 27, 2013, 10:42 a.m.

Details

Message ID F3068DEED1A463459E0887A091B154933DEA37@BY2PRD0710MB364.namprd07.prod.outlook.com
State New
Headers show

Commit Message

Hurugalawadi, Naveen Feb. 27, 2013, 10:42 a.m.
Hi Marcus,

I had a "TAB" inserted as the pattern was starting at the 9th column.
Hence, it showed trailing whites paces in the patch.

It has been modified according to the earlier patterns in the same file.

Please find attached the modified patch as per your suggestions.

Thanks,
Naveen

Comments

Marcus Shawcroft Feb. 27, 2013, 10:58 a.m.
On 27/02/13 10:42, Hurugalawadi, Naveen wrote:
> Hi Marcus,
>
> I had a "TAB" inserted as the pattern was starting at the 9th column.
> Hence, it showed trailing whites paces in the patch.

The use of TAB there is fine.  The issue is that you have trail white 
space at the end of the line, which is still present in the latest patch.

/M

Patch hide | download patch | download mbox

--- gcc/config/aarch64/aarch64-simd.md	2013-02-20 13:24:21.608042549 +0530
+++ gcc/config/aarch64/aarch64-simd.md	2013-02-27 16:04:24.839239567 +0530
@@ -44,6 +44,7 @@ 
 ; simd_dup              duplicate element.
 ; simd_dupgp            duplicate general purpose register.
 ; simd_ext              bitwise extract from pair.
+; simd_fabd             floating absolute difference and accumulate.
 ; simd_fadd             floating point add/sub.
 ; simd_fcmp             floating point compare.
 ; simd_fcvti            floating point convert to integer.
@@ -147,6 +148,7 @@ 
    simd_dup,\
    simd_dupgp,\
    simd_ext,\
+   simd_fabd,\
    simd_fadd,\
    simd_fcmp,\
    simd_fcvti,\
@@ -520,6 +522,40 @@ 
    (set_attr "simd_mode" "<MODE>")]
 )
 
+(define_insn "abd<mode>_3"
+  [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
+        (abs:VDQ_BHSI (minus:VDQ_BHSI 
+		       (match_operand:VDQ_BHSI 1 "register_operand" "w")
+		       (match_operand:VDQ_BHSI 2 "register_operand" "w"))))]
+  "TARGET_SIMD"
+  "sabd\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
+  [(set_attr "simd_type" "simd_abd")
+   (set_attr "simd_mode" "<MODE>")]
+)
+
+(define_insn "aba<mode>_3"
+  [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
+        (plus:VDQ_BHSI (abs:VDQ_BHSI (minus:VDQ_BHSI 
+			 (match_operand:VDQ_BHSI 1 "register_operand" "w")
+			 (match_operand:VDQ_BHSI 2 "register_operand" "w")))
+		       (match_operand:VDQ_BHSI 3 "register_operand" "0")))]
+  "TARGET_SIMD"
+  "saba\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
+  [(set_attr "simd_type" "simd_abd")
+   (set_attr "simd_mode" "<MODE>")]
+)
+
+(define_insn "fabd<mode>_3"
+  [(set (match_operand:VDQF 0 "register_operand" "=w")
+        (abs:VDQF (minus:VDQF 
+		   (match_operand:VDQF 1 "register_operand" "w")
+		   (match_operand:VDQF 2 "register_operand" "w"))))]
+  "TARGET_SIMD"
+  "fabd\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
+  [(set_attr "simd_type" "simd_fabd")
+   (set_attr "simd_mode" "<MODE>")]
+)
+
 (define_insn "and<mode>3"
   [(set (match_operand:VDQ 0 "register_operand" "=w")
         (and:VDQ (match_operand:VDQ 1 "register_operand" "w")
--- gcc/testsuite/gcc.target/aarch64/vect.c	2013-02-20 13:24:22.140042557 +0530
+++ gcc/testsuite/gcc.target/aarch64/vect.c	2013-02-27 15:55:57.855238677 +0530
@@ -55,6 +55,8 @@  int main (void)
   int smin_vector[] = {0, -1, -2, -3, -4, -5, -6, -7, -8, -9, -10, -11, -12, -13, -14, -15};
   unsigned int umax_vector[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15};
   unsigned int umin_vector[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15};
+  int sabd_vector[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
+  int saba_vector[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
   int reduce_smax_value = 0;
   int reduce_smin_value = -15;
   unsigned int reduce_umax_value = 15;
@@ -81,6 +83,8 @@  int main (void)
   TEST (smin, s);
   TEST (umax, u);
   TEST (umin, u);
+  TEST (sabd, s);
+  TEST (saba, s);
   TESTV (reduce_smax, s);
   TESTV (reduce_smin, s);
   TESTV (reduce_umax, u);
--- gcc/testsuite/gcc.target/aarch64/vect-compile.c	2013-02-20 13:24:22.140042557 +0530
+++ gcc/testsuite/gcc.target/aarch64/vect-compile.c	2013-02-27 15:55:57.855238677 +0530
@@ -16,5 +16,7 @@ 
 /* { dg-final { scan-assembler "uminv" } } */
 /* { dg-final { scan-assembler "smaxv" } } */
 /* { dg-final { scan-assembler "sminv" } } */
+/* { dg-final { scan-assembler "sabd" } } */
+/* { dg-final { scan-assembler "saba" } } */
 /* { dg-final { scan-assembler-times "addv" 2} } */
 /* { dg-final { scan-assembler-times "addp" 2} } */
--- gcc/testsuite/gcc.target/aarch64/vect-fp.c	2013-02-20 13:24:22.140042557 +0530
+++ gcc/testsuite/gcc.target/aarch64/vect-fp.c	2013-02-27 15:55:57.855238677 +0530
@@ -117,6 +117,16 @@  int main (void)
 			    9.0, 10.0, 11.0, 12.0,
 			    13.0, 14.0, 15.0, 16.0 };
 
+  F32  fabd_F32_vector[] = { 1.0f, 1.0f, 1.0f, 1.0f,
+			     1.0f, 1.0f, 1.0f, 1.0f,
+			     1.0f, 1.0f, 1.0f, 1.0f,
+			     1.0f, 1.0f, 1.0f, 1.0f };
+
+  F64  fabd_F64_vector[] = { 1.0, 1.0, 1.0, 1.0,
+			     1.0, 1.0, 1.0, 1.0,
+			     1.0, 1.0, 1.0, 1.0,
+			     1.0, 1.0, 1.0, 1.0 };
+
   /* Setup input vectors.  */
   for (i=1; i<=16; i++)
     {
@@ -132,6 +142,7 @@  int main (void)
   TEST (div, 3);
   TEST (neg, 2);
   TEST (abs, 2);
+  TEST (fabd, 3);
 
   return 0;
 }
--- gcc/testsuite/gcc.target/aarch64/vect-fp-compile.c	2013-02-20 13:24:22.140042557 +0530
+++ gcc/testsuite/gcc.target/aarch64/vect-fp-compile.c	2013-02-27 15:55:57.855238677 +0530
@@ -11,3 +11,4 @@ 
 /* { dg-final { scan-assembler "fdiv\\tv" } } */
 /* { dg-final { scan-assembler "fneg\\tv" } } */
 /* { dg-final { scan-assembler "fabs\\tv" } } */
+/* { dg-final { scan-assembler "fabd\\tv" } } */
--- gcc/testsuite/gcc.target/aarch64/vect-fp.x	2013-02-20 13:24:22.140042557 +0530
+++ gcc/testsuite/gcc.target/aarch64/vect-fp.x	2013-02-27 15:55:57.855238677 +0530
@@ -7,13 +7,23 @@  typedef double *__restrict__ pRF64;
 extern float fabsf (float);
 extern double fabs (double);
 
+#define DEF3a(fname, type, op) \
+			 void  fname##_##type (pR##type a,   \
+					       pR##type b,   \
+					       pR##type c)   \
+			 {                                   \
+			   int i;                            \
+			   for (i = 0; i < 16; i++)              \
+			     a[i] = op (b[i] - c[i]);        \
+			 }
+
 #define DEF3(fname, type, op) \
 			void  fname##_##type (pR##type a,   \
 					      pR##type b,   \
 					      pR##type c)   \
 			{				    \
 			  int i; 			    \
-			  for (i=0; i<16; i++)		    \
+			  for (i = 0; i < 16; i++)		    \
 			    a[i] = b[i] op c[i];	    \
 			}
 
@@ -22,11 +32,15 @@  extern double fabs (double);
 					     pR##type b) \
 			{				  \
 			  int i; 			  \
-			  for (i=0; i<16; i++)		  \
+			  for (i = 0; i < 16; i++)		  \
 			    a[i] = op(b[i]);		  \
 			}
 
 
+#define DEFN3a(fname, op) \
+		 DEF3a (fname, F32, op) \
+		 DEF3a (fname, F64, op)
+
 #define DEFN3(fname, op) \
 		DEF3 (fname, F32, op) \
 		DEF3 (fname, F64, op)
@@ -42,3 +56,5 @@  DEFN3 (div, /)
 DEFN2 (neg, -)
 DEF2 (abs, F32, fabsf)
 DEF2 (abs, F64, fabs)
+DEF3a (fabd, F32, fabsf)
+DEF3a (fabd, F64, fabs)
--- gcc/testsuite/gcc.target/aarch64/vect.x	2013-02-20 13:24:22.140042557 +0530
+++ gcc/testsuite/gcc.target/aarch64/vect.x	2013-02-27 15:55:57.855238677 +0530
@@ -138,3 +138,18 @@  long long reduce_add_s64 (pRINT64 a)
 
   return s;
 }
+
+void sabd (pRINT a, pRINT b, pRINT c)
+{
+  int i;
+  for (i = 0; i < 16; i++)
+    c[i] = abs (a[i] - b[i]);
+}
+
+void saba (pRINT a, pRINT b, pRINT c)
+{
+  int i;
+  for (i = 0; i < 16; i++)
+    c[i] += abs (a[i] - b[i]);
+}
+