diff mbox

[AArch64] Implement SIMD Absolute Difference Instructions

Message ID F3068DEED1A463459E0887A091B154933A477A@BY2PRD0710MB364.namprd07.prod.outlook.com
State New
Headers show

Commit Message

Hurugalawadi, Naveen Jan. 30, 2013, 5:46 a.m. UTC
Hi,

Please find attached the patch that implements absolute difference
instructions for aarch64 target.
The patch modifies the testcase vect.c and vect-fp.c to check the
generated instructions and also their functionality.

Please review the patch and let me know if there should be any
modifications?

Build and tested on aarch64-thunder-elf (using Cavium's internal
simulator). 

Thanks,
Naveen.H.S

gcc/

2013-01-30   Naveen H.S  <Naveen.Hurugalawadi@caviumnetworks.com>

	* config/aarch64/aarch64-simd.md (simd_fabd): New Attribute.
	(abd<mode>_3): New pattern.
	(aba<mode>_3): New pattern.
	(fabd<mode>_3): New pattern.

gcc/testsuite/

2013-01-30   Naveen H.S  <Naveen.Hurugalawadi@caviumnetworks.com>

	* gcc.target/aarch64/vect.c: Test and result vector added
	for sabd and saba instructions.
	* gcc.target/aarch64/vect-compile.c: Check for sabd and saba
	instructions in assembly.
	* gcc.target/aarch64/vect.x: Add sabd and saba test functions.
	* gcc.target/aarch64/vect-fp.c: Test and result vector added
	for fabd instruction.
	* gcc.target/aarch64/vect-fp-compile.c: Check for fabd 
	instruction in assembly.
	* gcc.target/aarch64/vect-fp.x: Add fabd test function.

Comments

Marcus Shawcroft Feb. 26, 2013, 3:23 p.m. UTC | #1
Hi Naveen,

Please accept my apologies for the delay in responding to your patch.

On 30/01/13 05:46, Hurugalawadi, Naveen wrote:


> +(define_insn "abd<mode>_3"
> +  [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
> +	(abs:VDQ_BHSI (minus:VDQ_BHSI

Please drop the trailing white space, here and on the following patterns.

> --- gcc/testsuite/gcc.target/aarch64/vect-fp.x	2013-01-24 20:10:09.703833384 +0530
> +++ gcc/testsuite/gcc.target/aarch64/vect-fp.x	2013-01-29 14:11:16.909568490 +0530
> @@ -7,6 +7,16 @@ typedef double *__restrict__ pRF64;
>   extern float fabsf (float);
>   extern double fabs (double);
>
> +#define DEF3a(fname, type, op) \
> +			 void  fname##_##type (pR##type a,   \
> +					       pR##type b,   \
> +					       pR##type c)   \
> +			 {                                   \
> +			   int i;                            \
> +			   for (i=0; i<16; i++)              \

GNU style, spaces around '=' and '<'.

> --- gcc/testsuite/gcc.target/aarch64/vect.x	2013-01-24 20:10:09.703833384 +0530
> +++ gcc/testsuite/gcc.target/aarch64/vect.x	2013-01-29 18:37:32.321808454 +0530
> @@ -138,3 +138,18 @@ long long reduce_add_s64 (pRINT64 a)
>
>     return s;
>   }
> +
> +void sabd (pRINT a, pRINT b, pRINT c)
> +{
> +  int i;
> +  for (i=0;i<16;i++)
> +    c[i] = abs (a[i] - b[i]);
> +}
> +
> +void saba (pRINT a, pRINT b, pRINT c)
> +{
> +  int i;
> +  for (i=0;i<16;i++)
> +    c[i] += abs (a[i] - b[i]);
> +}
> +

GNU style please.

Cheers
/Marcus
Marcus Shawcroft Feb. 26, 2013, 3:29 p.m. UTC | #2
On 30/01/13 05:46, Hurugalawadi, Naveen wrote:
> Hi,
>
> Please find attached the patch that implements absolute difference
> instructions for aarch64 target.
> The patch modifies the testcase vect.c and vect-fp.c to check the
> generated instructions and also their functionality.
>
> Please review the patch and let me know if there should be any
> modifications?
>

>     simd_fadd,\
> +   simd_fabd,\
>     simd_fcmp,\

Alphabetical order please.

Cheers
/Marcus
diff mbox

Patch

--- gcc/config/aarch64/aarch64-simd.md	2013-01-29 11:37:04.705429514 +0530
+++ gcc/config/aarch64/aarch64-simd.md	2013-01-29 16:58:07.401718855 +0530
@@ -44,6 +44,7 @@ 
 ; simd_dup              duplicate element.
 ; simd_dupgp            duplicate general purpose register.
 ; simd_ext              bitwise extract from pair.
+; simd_fabd             floating absolute difference and accumulate.
 ; simd_fadd             floating point add/sub.
 ; simd_fcmp             floating point compare.
 ; simd_fcvti            floating point convert to integer.
@@ -148,6 +149,7 @@ 
    simd_dupgp,\
    simd_ext,\
    simd_fadd,\
+   simd_fabd,\
    simd_fcmp,\
    simd_fcvti,\
    simd_fcvtl,\
@@ -520,6 +522,40 @@ 
    (set_attr "simd_mode" "<MODE>")]
 )
 
+(define_insn "abd<mode>_3"
+  [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
+	(abs:VDQ_BHSI (minus:VDQ_BHSI 
+		       (match_operand:VDQ_BHSI 1 "register_operand" "w")
+		       (match_operand:VDQ_BHSI 2 "register_operand" "w"))))]
+  "TARGET_SIMD"
+  "sabd\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
+  [(set_attr "simd_type" "simd_abd")
+   (set_attr "simd_mode" "<MODE>")]
+)
+
+(define_insn "aba<mode>_3"
+  [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
+	(plus:VDQ_BHSI (abs:VDQ_BHSI (minus:VDQ_BHSI 
+			 (match_operand:VDQ_BHSI 1 "register_operand" "w")
+			 (match_operand:VDQ_BHSI 2 "register_operand" "w")))
+		       (match_operand:VDQ_BHSI 3 "register_operand" "0")))]
+  "TARGET_SIMD"
+  "saba\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
+  [(set_attr "simd_type" "simd_abd")
+   (set_attr "simd_mode" "<MODE>")]
+)
+
+(define_insn "fabd<mode>_3"
+  [(set (match_operand:VDQF 0 "register_operand" "=w")
+	(abs:VDQF (minus:VDQF 
+		   (match_operand:VDQF 1 "register_operand" "w")
+		   (match_operand:VDQF 2 "register_operand" "w"))))]
+  "TARGET_SIMD"
+  "fabd\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
+  [(set_attr "simd_type" "simd_fabd")
+   (set_attr "simd_mode" "<MODE>")]
+)
+
 (define_insn "and<mode>3"
   [(set (match_operand:VDQ 0 "register_operand" "=w")
         (and:VDQ (match_operand:VDQ 1 "register_operand" "w")
--- gcc/testsuite/gcc.target/aarch64/vect.c	2013-01-24 20:10:09.703833384 +0530
+++ gcc/testsuite/gcc.target/aarch64/vect.c	2013-01-30 10:30:05.089505837 +0530
@@ -55,6 +55,8 @@  int main (void)
   int smin_vector[] = {0, -1, -2, -3, -4, -5, -6, -7, -8, -9, -10, -11, -12, -13, -14, -15};
   unsigned int umax_vector[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15};
   unsigned int umin_vector[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15};
+  int sabd_vector[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
+  int saba_vector[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
   int reduce_smax_value = 0;
   int reduce_smin_value = -15;
   unsigned int reduce_umax_value = 15;
@@ -81,6 +83,8 @@  int main (void)
   TEST (smin, s);
   TEST (umax, u);
   TEST (umin, u);
+  TEST (sabd, s);
+  TEST (saba, s);
   TESTV (reduce_smax, s);
   TESTV (reduce_smin, s);
   TESTV (reduce_umax, u);
--- gcc/testsuite/gcc.target/aarch64/vect-compile.c	2013-01-24 20:10:09.703833384 +0530
+++ gcc/testsuite/gcc.target/aarch64/vect-compile.c	2013-01-29 14:11:16.909568490 +0530
@@ -16,5 +16,7 @@ 
 /* { dg-final { scan-assembler "uminv" } } */
 /* { dg-final { scan-assembler "smaxv" } } */
 /* { dg-final { scan-assembler "sminv" } } */
+/* { dg-final { scan-assembler "sabd" } } */
+/* { dg-final { scan-assembler "saba" } } */
 /* { dg-final { scan-assembler-times "addv" 2} } */
 /* { dg-final { scan-assembler-times "addp" 2} } */
--- gcc/testsuite/gcc.target/aarch64/vect-fp.c	2013-01-24 20:10:09.703833384 +0530
+++ gcc/testsuite/gcc.target/aarch64/vect-fp.c	2013-01-30 10:40:23.877491750 +0530
@@ -117,6 +117,16 @@  int main (void)
 			    9.0, 10.0, 11.0, 12.0,
 			    13.0, 14.0, 15.0, 16.0 };
 
+  F32  fabd_F32_vector[] = { 1.0f, 1.0f, 1.0f, 1.0f,
+			     1.0f, 1.0f, 1.0f, 1.0f,
+			     1.0f, 1.0f, 1.0f, 1.0f,
+			     1.0f, 1.0f, 1.0f, 1.0f };
+
+  F64  fabd_F64_vector[] = { 1.0, 1.0, 1.0, 1.0,
+			     1.0, 1.0, 1.0, 1.0,
+			     1.0, 1.0, 1.0, 1.0,
+			     1.0, 1.0, 1.0, 1.0 };
+
   /* Setup input vectors.  */
   for (i=1; i<=16; i++)
     {
@@ -132,6 +142,7 @@  int main (void)
   TEST (div, 3);
   TEST (neg, 2);
   TEST (abs, 2);
+  TEST (fabd, 3);
 
   return 0;
 }
--- gcc/testsuite/gcc.target/aarch64/vect-fp-compile.c	2013-01-24 20:10:09.703833384 +0530
+++ gcc/testsuite/gcc.target/aarch64/vect-fp-compile.c	2013-01-29 14:11:16.909568490 +0530
@@ -11,3 +11,4 @@ 
 /* { dg-final { scan-assembler "fdiv\\tv" } } */
 /* { dg-final { scan-assembler "fneg\\tv" } } */
 /* { dg-final { scan-assembler "fabs\\tv" } } */
+/* { dg-final { scan-assembler "fabd\\tv" } } */
--- gcc/testsuite/gcc.target/aarch64/vect-fp.x	2013-01-24 20:10:09.703833384 +0530
+++ gcc/testsuite/gcc.target/aarch64/vect-fp.x	2013-01-29 14:11:16.909568490 +0530
@@ -7,6 +7,16 @@  typedef double *__restrict__ pRF64;
 extern float fabsf (float);
 extern double fabs (double);
 
+#define DEF3a(fname, type, op) \
+			 void  fname##_##type (pR##type a,   \
+					       pR##type b,   \
+					       pR##type c)   \
+			 {                                   \
+			   int i;                            \
+			   for (i=0; i<16; i++)              \
+			     a[i] = op (b[i] - c[i]);        \
+			 }
+
 #define DEF3(fname, type, op) \
 			void  fname##_##type (pR##type a,   \
 					      pR##type b,   \
@@ -27,6 +37,10 @@  extern double fabs (double);
 			}
 
 
+#define DEFN3a(fname, op) \
+		 DEF3a (fname, F32, op) \
+		 DEF3a (fname, F64, op)
+
 #define DEFN3(fname, op) \
 		DEF3 (fname, F32, op) \
 		DEF3 (fname, F64, op)
@@ -42,3 +56,5 @@  DEFN3 (div, /)
 DEFN2 (neg, -)
 DEF2 (abs, F32, fabsf)
 DEF2 (abs, F64, fabs)
+DEF3a (fabd, F32, fabsf)
+DEF3a (fabd, F64, fabs)
--- gcc/testsuite/gcc.target/aarch64/vect.x	2013-01-24 20:10:09.703833384 +0530
+++ gcc/testsuite/gcc.target/aarch64/vect.x	2013-01-29 18:37:32.321808454 +0530
@@ -138,3 +138,18 @@  long long reduce_add_s64 (pRINT64 a)
 
   return s;
 }
+
+void sabd (pRINT a, pRINT b, pRINT c)
+{
+  int i;
+  for (i=0;i<16;i++)
+    c[i] = abs (a[i] - b[i]);
+}
+
+void saba (pRINT a, pRINT b, pRINT c)
+{
+  int i;
+  for (i=0;i<16;i++)
+    c[i] += abs (a[i] - b[i]);
+}
+