Patchwork [AArch64] Implement SIMD Absolute Difference Instructions

login
register
mail settings
Submitter Hurugalawadi, Naveen
Date Feb. 27, 2013, 11:16 a.m.
Message ID <F3068DEED1A463459E0887A091B154933DEA4B@BY2PRD0710MB364.namprd07.prod.outlook.com>
Download mbox | patch
Permalink /patch/223582/
State New
Headers show

Comments

Hurugalawadi, Naveen - Feb. 27, 2013, 11:16 a.m.
Hi Marcus,

>> The use of TAB there is fine.  The issue is that you have trail white
>> space at the end of the line, which is still present in the latest patch.

Sorry. I confused it with spaces at the start of pattern instead of 
trailing space. I have modified it as per your suggestion.
Please review the modified patch.

Thanks,
Naveen
Marcus Shawcroft - Feb. 27, 2013, 11:23 a.m.
On 27/02/13 11:16, Hurugalawadi, Naveen wrote:
> Hi Marcus,
>
>>> The use of TAB there is fine.  The issue is that you have trail white
>>> space at the end of the line, which is still present in the latest patch.
>
> Sorry. I confused it with spaces at the start of pattern instead of
> trailing space. I have modified it as per your suggestion.
> Please review the modified patch.
>
> Thanks,
> Naveen
>

Thank you.  This is OK for stage-1.

/Marcus
James Greenhalgh - March 1, 2013, 1:51 p.m.
> >> The use of TAB there is fine.  The issue is that you have trail
> white
> >> space at the end of the line, which is still present in the latest
> patch.
> 
> Sorry. I confused it with spaces at the start of pattern instead of
> trailing space. I have modified it as per your suggestion.
> Please review the modified patch.

Hi Naveen,

It looks like you didn't quite catch all of them:

> +#define DEF3a(fname, type, op) \
> +			 void  fname##_##type (pR##type a,   \
> +					       pR##type b,   \
> +					       pR##type c)   \
> +			 {                                   \
> +			   int i;                            \
> +			   for (i = 0; i < 16; i++)	     \<!tab!>
> +			     a[i] = op (b[i] - c[i]);        \
> +			 }

This one introduces a problem for the testsuite as whitespace between
a \ and the newline is an error on some compilers and a warning on GCC.

There is a script at contrib/check_GNU_style.sh which is helpful
for catching bugs like this.

Thanks,
James Greenhalgh

Patch

--- gcc/config/aarch64/aarch64-simd.md	2013-02-20 13:24:21.608042549 +0530
+++ gcc/config/aarch64/aarch64-simd.md	2013-02-27 16:34:34.203242741 +0530
@@ -44,6 +44,7 @@ 
 ; simd_dup              duplicate element.
 ; simd_dupgp            duplicate general purpose register.
 ; simd_ext              bitwise extract from pair.
+; simd_fabd             floating absolute difference and accumulate.
 ; simd_fadd             floating point add/sub.
 ; simd_fcmp             floating point compare.
 ; simd_fcvti            floating point convert to integer.
@@ -147,6 +148,7 @@ 
    simd_dup,\
    simd_dupgp,\
    simd_ext,\
+   simd_fabd,\
    simd_fadd,\
    simd_fcmp,\
    simd_fcvti,\
@@ -520,6 +522,40 @@ 
    (set_attr "simd_mode" "<MODE>")]
 )
 
+(define_insn "abd<mode>_3"
+  [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
+	(abs:VDQ_BHSI (minus:VDQ_BHSI
+		       (match_operand:VDQ_BHSI 1 "register_operand" "w")
+		       (match_operand:VDQ_BHSI 2 "register_operand" "w"))))]
+  "TARGET_SIMD"
+  "sabd\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
+  [(set_attr "simd_type" "simd_abd")
+   (set_attr "simd_mode" "<MODE>")]
+)
+
+(define_insn "aba<mode>_3"
+  [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
+	(plus:VDQ_BHSI (abs:VDQ_BHSI (minus:VDQ_BHSI
+			 (match_operand:VDQ_BHSI 1 "register_operand" "w")
+			 (match_operand:VDQ_BHSI 2 "register_operand" "w")))
+		       (match_operand:VDQ_BHSI 3 "register_operand" "0")))]
+  "TARGET_SIMD"
+  "saba\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
+  [(set_attr "simd_type" "simd_abd")
+   (set_attr "simd_mode" "<MODE>")]
+)
+
+(define_insn "fabd<mode>_3"
+  [(set (match_operand:VDQF 0 "register_operand" "=w")
+	(abs:VDQF (minus:VDQF
+		   (match_operand:VDQF 1 "register_operand" "w")
+		   (match_operand:VDQF 2 "register_operand" "w"))))]
+  "TARGET_SIMD"
+  "fabd\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
+  [(set_attr "simd_type" "simd_fabd")
+   (set_attr "simd_mode" "<MODE>")]
+)
+
 (define_insn "and<mode>3"
   [(set (match_operand:VDQ 0 "register_operand" "=w")
         (and:VDQ (match_operand:VDQ 1 "register_operand" "w")
--- gcc/testsuite/gcc.target/aarch64/vect.c	2013-02-20 13:24:22.140042557 +0530
+++ gcc/testsuite/gcc.target/aarch64/vect.c	2013-02-27 16:34:34.203242741 +0530
@@ -55,6 +55,8 @@  int main (void)
   int smin_vector[] = {0, -1, -2, -3, -4, -5, -6, -7, -8, -9, -10, -11, -12, -13, -14, -15};
   unsigned int umax_vector[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15};
   unsigned int umin_vector[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15};
+  int sabd_vector[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
+  int saba_vector[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
   int reduce_smax_value = 0;
   int reduce_smin_value = -15;
   unsigned int reduce_umax_value = 15;
@@ -81,6 +83,8 @@  int main (void)
   TEST (smin, s);
   TEST (umax, u);
   TEST (umin, u);
+  TEST (sabd, s);
+  TEST (saba, s);
   TESTV (reduce_smax, s);
   TESTV (reduce_smin, s);
   TESTV (reduce_umax, u);
--- gcc/testsuite/gcc.target/aarch64/vect-compile.c	2013-02-20 13:24:22.140042557 +0530
+++ gcc/testsuite/gcc.target/aarch64/vect-compile.c	2013-02-27 16:34:34.231242741 +0530
@@ -16,5 +16,7 @@ 
 /* { dg-final { scan-assembler "uminv" } } */
 /* { dg-final { scan-assembler "smaxv" } } */
 /* { dg-final { scan-assembler "sminv" } } */
+/* { dg-final { scan-assembler "sabd" } } */
+/* { dg-final { scan-assembler "saba" } } */
 /* { dg-final { scan-assembler-times "addv" 2} } */
 /* { dg-final { scan-assembler-times "addp" 2} } */
--- gcc/testsuite/gcc.target/aarch64/vect-fp.c	2013-02-20 13:24:22.140042557 +0530
+++ gcc/testsuite/gcc.target/aarch64/vect-fp.c	2013-02-27 16:34:34.235242743 +0530
@@ -117,6 +117,16 @@  int main (void)
 			    9.0, 10.0, 11.0, 12.0,
 			    13.0, 14.0, 15.0, 16.0 };
 
+  F32  fabd_F32_vector[] = { 1.0f, 1.0f, 1.0f, 1.0f,
+			     1.0f, 1.0f, 1.0f, 1.0f,
+			     1.0f, 1.0f, 1.0f, 1.0f,
+			     1.0f, 1.0f, 1.0f, 1.0f };
+
+  F64  fabd_F64_vector[] = { 1.0, 1.0, 1.0, 1.0,
+			     1.0, 1.0, 1.0, 1.0,
+			     1.0, 1.0, 1.0, 1.0,
+			     1.0, 1.0, 1.0, 1.0 };
+
   /* Setup input vectors.  */
   for (i=1; i<=16; i++)
     {
@@ -132,6 +142,7 @@  int main (void)
   TEST (div, 3);
   TEST (neg, 2);
   TEST (abs, 2);
+  TEST (fabd, 3);
 
   return 0;
 }
--- gcc/testsuite/gcc.target/aarch64/vect-fp-compile.c	2013-02-20 13:24:22.140042557 +0530
+++ gcc/testsuite/gcc.target/aarch64/vect-fp-compile.c	2013-02-27 16:34:34.235242743 +0530
@@ -11,3 +11,4 @@ 
 /* { dg-final { scan-assembler "fdiv\\tv" } } */
 /* { dg-final { scan-assembler "fneg\\tv" } } */
 /* { dg-final { scan-assembler "fabs\\tv" } } */
+/* { dg-final { scan-assembler "fabd\\tv" } } */
--- gcc/testsuite/gcc.target/aarch64/vect-fp.x	2013-02-20 13:24:22.140042557 +0530
+++ gcc/testsuite/gcc.target/aarch64/vect-fp.x	2013-02-27 16:37:24.211243041 +0530
@@ -7,13 +7,23 @@  typedef double *__restrict__ pRF64;
 extern float fabsf (float);
 extern double fabs (double);
 
+#define DEF3a(fname, type, op) \
+			 void  fname##_##type (pR##type a,   \
+					       pR##type b,   \
+					       pR##type c)   \
+			 {                                   \
+			   int i;                            \
+			   for (i = 0; i < 16; i++)	     \	
+			     a[i] = op (b[i] - c[i]);        \
+			 }
+
 #define DEF3(fname, type, op) \
 			void  fname##_##type (pR##type a,   \
 					      pR##type b,   \
 					      pR##type c)   \
 			{				    \
 			  int i; 			    \
-			  for (i=0; i<16; i++)		    \
+			  for (i = 0; i < 16; i++)	    \
 			    a[i] = b[i] op c[i];	    \
 			}
 
@@ -22,11 +32,15 @@  extern double fabs (double);
 					     pR##type b) \
 			{				  \
 			  int i; 			  \
-			  for (i=0; i<16; i++)		  \
+			  for (i = 0; i < 16; i++)	  \
 			    a[i] = op(b[i]);		  \
 			}
 
 
+#define DEFN3a(fname, op) \
+		 DEF3a (fname, F32, op) \
+		 DEF3a (fname, F64, op)
+
 #define DEFN3(fname, op) \
 		DEF3 (fname, F32, op) \
 		DEF3 (fname, F64, op)
@@ -42,3 +56,5 @@  DEFN3 (div, /)
 DEFN2 (neg, -)
 DEF2 (abs, F32, fabsf)
 DEF2 (abs, F64, fabs)
+DEF3a (fabd, F32, fabsf)
+DEF3a (fabd, F64, fabs)
--- gcc/testsuite/gcc.target/aarch64/vect.x	2013-02-20 13:24:22.140042557 +0530
+++ gcc/testsuite/gcc.target/aarch64/vect.x	2013-02-27 16:34:34.235242743 +0530
@@ -138,3 +138,18 @@  long long reduce_add_s64 (pRINT64 a)
 
   return s;
 }
+
+void sabd (pRINT a, pRINT b, pRINT c)
+{
+  int i;
+  for (i = 0; i < 16; i++)
+    c[i] = abs (a[i] - b[i]);
+}
+
+void saba (pRINT a, pRINT b, pRINT c)
+{
+  int i;
+  for (i = 0; i < 16; i++)
+    c[i] += abs (a[i] - b[i]);
+}
+