diff mbox series

[AArch64] Make use of FADDP in simple reductions

Message ID VI1PR08MB33733AE5057E893A57B245FC84320@VI1PR08MB3373.eurprd08.prod.outlook.com
State New
Headers show
Series [AArch64] Make use of FADDP in simple reductions | expand

Commit Message

Elen Kalda May 8, 2019, 1:36 p.m. UTC
Hi,

This patch adds a pattern to support the FADDP (scalar) instruction.

Before the patch, the C code

typedef double v2df __attribute__((vector_size (16)));

double
foo (v2df x)
{
  return x[1] + x[0];
}

generated:
foo:
        dup     d1, v0.d[0]
        dup     d0, v0.d[1]
        fadd    d0, d1, d0
        ret

After patch:
foo:
	faddp	d0, v0.2d
	ret


Bootstrapped and done regression tests on aarch64-none-linux-gnu - 
no issues found.

Best wishes,
Elen


gcc/ChangeLog:

2019-04-24  Elen Kalda  <elen.kalda@arm.com>

	* config/aarch64/aarch64-simd.md (*aarch64_faddp<mode>): New.

gcc/testsuite/ChangeLog:

2019-04-24  Elen Kalda  <elen.kalda@arm.com>

	* gcc.target/aarch64/simd/scalar_faddp.c: New test.

Comments

Sudakshina Das May 30, 2019, 1:43 p.m. UTC | #1
Hi Elen

Thank you for doing this. You will need a maintainer's approval but I 
would like to add a couple of comments. Please find them inline.

On 08/05/2019 14:36, Elen Kalda wrote:
> Hi,
> 
> This patch adds a pattern to support the FADDP (scalar) instruction.
> 
> Before the patch, the C code
> 
> typedef double v2df __attribute__((vector_size (16)));
> 
> double
> foo (v2df x)
> {
>    return x[1] + x[0];
> }
> 
> generated:
> foo:
>          dup     d1, v0.d[0]
>          dup     d0, v0.d[1]
>          fadd    d0, d1, d0
>          ret
> 
> After patch:
> foo:
> 	faddp	d0, v0.2d
> 	ret
> 
> 
> Bootstrapped and done regression tests on aarch64-none-linux-gnu -
> no issues found.
> 
> Best wishes,
> Elen
> 
> 
> gcc/ChangeLog:
> 
> 2019-04-24  Elen Kalda  <elen.kalda@arm.com>
> 
> 	* config/aarch64/aarch64-simd.md (*aarch64_faddp<mode>): New.
> 
> gcc/testsuite/ChangeLog:
> 
> 2019-04-24  Elen Kalda  <elen.kalda@arm.com>
> 
> 	* gcc.target/aarch64/simd/scalar_faddp.c: New test.
> 

 > diff --git a/gcc/config/aarch64/aarch64-simd.md 
b/gcc/config/aarch64/aarch64-simd.md
 > index 
e3852c5d182b70978d7603225fce55c0b8ee2894..89fedc6cb3f0c6eb74c6f8d0b21cedb5ae20a095 
100644
 > --- a/gcc/config/aarch64/aarch64-simd.md
 > +++ b/gcc/config/aarch64/aarch64-simd.md
 > @@ -2372,6 +2372,21 @@
 >    [(set_attr "type" "neon_fp_reduc_add_<stype><q>")]
 >  )
 >
 > +(define_insn "*aarch64_faddp<mode>"
 > +  [(set (match_operand:<VEL> 0 "register_operand" "=w")
 > +    (plus:<VEL>
 > +      (vec_select:<VEL> (match_operand:VHSDF 1 "register_operand" "w")

I do not think the VHSDF mode should be used here. I believe you may 
have taken this from the vector form of this instruction but that seems 
to be different than the scalar one. Someone with more floating point 
instruction experience can chime in here.

 > +		(parallel[(match_operand 2 "const_int_operand" "n")]))
 > +      (vec_select:<VEL> (match_dup:VHSDF 1)
 > +		(parallel[(match_operand 3 "const_int_operand" "n")]))))]
 > +  "TARGET_SIMD
 > +  && ((INTVAL (operands[2]) == 0 && INTVAL (operands[3]) == 1)

Just some minor indentation issue. The && should be below T

 > +    || (INTVAL (operands[2]) == 1 && INTVAL (operands[3]) == 0))"

Likewise this should be below the second opening brace '('

...

 > --- /dev/null
 > +++ b/gcc/testsuite/gcc.target/aarch64/simd/scalar_faddp.c
 > @@ -0,0 +1,31 @@
 > +/* { dg-do assemble } */

This can be dg-do compile since you only want an assembly file

 > +/* { dg-require-effective-target arm_v8_2a_fp16_scalar_ok } */
 > +/* { dg-add-options arm_v8_2a_fp16_scalar } */
 > +/* { dg-additional-options "-save-temps -O1" } */

The --save-temps can then be removed as the dg-do compile will produce 
the .s file for you

 > +/* { dg-final { scan-assembler-not "dup" } } */
...


Thanks
Sudi
diff mbox series

Patch

diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md
index e3852c5d182b70978d7603225fce55c0b8ee2894..89fedc6cb3f0c6eb74c6f8d0b21cedb5ae20a095 100644
--- a/gcc/config/aarch64/aarch64-simd.md
+++ b/gcc/config/aarch64/aarch64-simd.md
@@ -2372,6 +2372,21 @@ 
   [(set_attr "type" "neon_fp_reduc_add_<stype><q>")]
 )
 
+(define_insn "*aarch64_faddp<mode>"
+  [(set (match_operand:<VEL> 0 "register_operand" "=w")
+    (plus:<VEL>
+      (vec_select:<VEL> (match_operand:VHSDF 1 "register_operand" "w")
+		(parallel[(match_operand 2 "const_int_operand" "n")]))
+      (vec_select:<VEL> (match_dup:VHSDF 1)
+		(parallel[(match_operand 3 "const_int_operand" "n")]))))]
+  "TARGET_SIMD
+  && ((INTVAL (operands[2]) == 0 && INTVAL (operands[3]) == 1)
+    || (INTVAL (operands[2]) == 1 && INTVAL (operands[3]) == 0))"
+  "faddp\t%<Vetype>0, %1.2<Vetype>"
+  [(set_attr "type" "neon_fp_reduc_add_<stype><q>")]
+)
+
+
 (define_insn "aarch64_reduc_plus_internal<mode>"
  [(set (match_operand:VDQV 0 "register_operand" "=w")
        (unspec:VDQV [(match_operand:VDQV 1 "register_operand" "w")]
diff --git a/gcc/testsuite/gcc.target/aarch64/simd/scalar_faddp.c b/gcc/testsuite/gcc.target/aarch64/simd/scalar_faddp.c
new file mode 100644
index 0000000000000000000000000000000000000000..2396286d483c16c8b70b16fa08bffeb15f034a93
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/simd/scalar_faddp.c
@@ -0,0 +1,31 @@ 
+/* { dg-do assemble } */
+/* { dg-require-effective-target arm_v8_2a_fp16_scalar_ok } */
+/* { dg-add-options arm_v8_2a_fp16_scalar } */
+/* { dg-additional-options "-save-temps -O1" } */
+/* { dg-final { scan-assembler-not "dup" } } */
+
+
+typedef double v2df __attribute__((vector_size (16)));
+typedef float v4sf __attribute__((vector_size (16)));
+typedef __fp16 v8hf __attribute__((vector_size (16)));
+
+double
+foo (v2df x)
+{
+  return x[1] + x[0];
+}
+/* { dg-final { scan-assembler-times {faddp\td[0-9]+, v[0-9]+.2d} 1 } } */
+
+float
+foo1 (v4sf x)
+{
+  return x[0] + x[1];
+}
+/* { dg-final { scan-assembler-times {faddp\ts[0-9]+, v[0-9]+.2s} 1 } } */
+
+__fp16
+foo2 (v8hf x)
+{
+  return x[0] + x[1];
+}
+/* { dg-final { scan-assembler-times {faddp\th[0-9]+, v[0-9]+.2h} 1 } } */