diff mbox series

aarch64: Fix up bfmlal lane pattern [PR104921]

Message ID Y+D7p8r66hgkCR1y@arm.com
State New
Headers show
Series aarch64: Fix up bfmlal lane pattern [PR104921] | expand

Commit Message

Alex Coplan Feb. 6, 2023, 1:07 p.m. UTC
Hi,

As the testcase shows, this pattern had an incorrect constraint leading
to GCC's output getting rejected by the assembler.

This patch fixes the constraint accordingly.

The test is split into two: one that can run without bf16 support from
the assembler and another that checks that the output actually assembles
when such support is available.

Bootstrapped/regtested on aarch64-linux-gnu.

OK for GCC 13? Or better to wait for next stage 1? What about backports?

Thanks,
Alex

gcc/ChangeLog:

	PR target/104921
	* config/aarch64/aarch64-simd.md (aarch64_bfmlal<bt>_lane<q>v4sf):
	Use correct constraint for operand 3.

gcc/testsuite/ChangeLog:

	PR target/104921
	* gcc.target/aarch64/pr104921-1.c: New test.
	* gcc.target/aarch64/pr104921-2.c: New test.
	* gcc.target/aarch64/pr104921.x: Include file for new tests.

Comments

Richard Sandiford Feb. 6, 2023, 1:51 p.m. UTC | #1
Alex Coplan <alex.coplan@arm.com> writes:
> Hi,
>
> As the testcase shows, this pattern had an incorrect constraint leading
> to GCC's output getting rejected by the assembler.
>
> This patch fixes the constraint accordingly.
>
> The test is split into two: one that can run without bf16 support from
> the assembler and another that checks that the output actually assembles
> when such support is available.
>
> Bootstrapped/regtested on aarch64-linux-gnu.
>
> OK for GCC 13? Or better to wait for next stage 1? What about backports?

OK for GCC 13 & backports, thanks.

Richard
>
> Thanks,
> Alex
>
> gcc/ChangeLog:
>
> 	PR target/104921
> 	* config/aarch64/aarch64-simd.md (aarch64_bfmlal<bt>_lane<q>v4sf):
> 	Use correct constraint for operand 3.
>
> gcc/testsuite/ChangeLog:
>
> 	PR target/104921
> 	* gcc.target/aarch64/pr104921-1.c: New test.
> 	* gcc.target/aarch64/pr104921-2.c: New test.
> 	* gcc.target/aarch64/pr104921.x: Include file for new tests.
>
> diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md
> index 7f212bf37cd..dd5eed387f2 100644
> --- a/gcc/config/aarch64/aarch64-simd.md
> +++ b/gcc/config/aarch64/aarch64-simd.md
> @@ -9153,7 +9153,7 @@ (define_insn "aarch64_bfmlal<bt>_lane<q>v4sf"
>    [(set (match_operand:V4SF 0 "register_operand" "=w")
>          (plus: V4SF (match_operand:V4SF 1 "register_operand" "0")
>                      (unspec:V4SF [(match_operand:V8BF 2 "register_operand" "w")
> -                                  (match_operand:VBF 3 "register_operand" "w")
> +                                  (match_operand:VBF 3 "register_operand" "x")
>                                    (match_operand:SI 4 "const_int_operand" "n")]
>                       BF_MLA)))]
>    "TARGET_BF16_SIMD"
> diff --git a/gcc/testsuite/gcc.target/aarch64/pr104921-1.c b/gcc/testsuite/gcc.target/aarch64/pr104921-1.c
> new file mode 100644
> index 00000000000..dcf6fe7d90d
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/aarch64/pr104921-1.c
> @@ -0,0 +1,12 @@
> +/* { dg-do compile } */
> +/* { dg-additional-options "-O2 -march=armv8.2-a+bf16 -std=gnu99 -save-temps" }  */
> +/* { dg-final { check-function-bodies "**" "" } } */
> +
> +#include "pr104921.x"
> +
> +/*
> +**foo:
> +**	mov	v([0-9]|1[0-5])\.8b, v16\.8b
> +**	bfmlalb	v0\.4s, v1\.8h, v([0-9]|1[0-5])\.h\[0\]
> +**	ret
> +*/
> diff --git a/gcc/testsuite/gcc.target/aarch64/pr104921-2.c b/gcc/testsuite/gcc.target/aarch64/pr104921-2.c
> new file mode 100644
> index 00000000000..211fcd0aca9
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/aarch64/pr104921-2.c
> @@ -0,0 +1,6 @@
> +/* { dg-do assemble } */
> +/* { dg-add-options arm_v8_2a_bf16_neon }  */
> +/* { dg-additional-options "-O2 -std=gnu99" }  */
> +/* { dg-require-effective-target arm_v8_2a_bf16_neon_ok } */
> +
> +#include "pr104921.x"
> diff --git a/gcc/testsuite/gcc.target/aarch64/pr104921.x b/gcc/testsuite/gcc.target/aarch64/pr104921.x
> new file mode 100644
> index 00000000000..1e1a6f24e22
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/aarch64/pr104921.x
> @@ -0,0 +1,9 @@
> +#include <arm_neon.h>
> +
> +float32x4_t
> +foo(float32x4_t x, bfloat16x8_t a)
> +{
> +  register bfloat16x4_t b asm ("v16");
> +  asm volatile ("" : "=w"(b));
> +  return vbfmlalbq_lane_f32 (x, a, b, 0);
> +}
diff mbox series

Patch

diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md
index 7f212bf37cd..dd5eed387f2 100644
--- a/gcc/config/aarch64/aarch64-simd.md
+++ b/gcc/config/aarch64/aarch64-simd.md
@@ -9153,7 +9153,7 @@  (define_insn "aarch64_bfmlal<bt>_lane<q>v4sf"
   [(set (match_operand:V4SF 0 "register_operand" "=w")
         (plus: V4SF (match_operand:V4SF 1 "register_operand" "0")
                     (unspec:V4SF [(match_operand:V8BF 2 "register_operand" "w")
-                                  (match_operand:VBF 3 "register_operand" "w")
+                                  (match_operand:VBF 3 "register_operand" "x")
                                   (match_operand:SI 4 "const_int_operand" "n")]
                      BF_MLA)))]
   "TARGET_BF16_SIMD"
diff --git a/gcc/testsuite/gcc.target/aarch64/pr104921-1.c b/gcc/testsuite/gcc.target/aarch64/pr104921-1.c
new file mode 100644
index 00000000000..dcf6fe7d90d
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/pr104921-1.c
@@ -0,0 +1,12 @@ 
+/* { dg-do compile } */
+/* { dg-additional-options "-O2 -march=armv8.2-a+bf16 -std=gnu99 -save-temps" }  */
+/* { dg-final { check-function-bodies "**" "" } } */
+
+#include "pr104921.x"
+
+/*
+**foo:
+**	mov	v([0-9]|1[0-5])\.8b, v16\.8b
+**	bfmlalb	v0\.4s, v1\.8h, v([0-9]|1[0-5])\.h\[0\]
+**	ret
+*/
diff --git a/gcc/testsuite/gcc.target/aarch64/pr104921-2.c b/gcc/testsuite/gcc.target/aarch64/pr104921-2.c
new file mode 100644
index 00000000000..211fcd0aca9
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/pr104921-2.c
@@ -0,0 +1,6 @@ 
+/* { dg-do assemble } */
+/* { dg-add-options arm_v8_2a_bf16_neon }  */
+/* { dg-additional-options "-O2 -std=gnu99" }  */
+/* { dg-require-effective-target arm_v8_2a_bf16_neon_ok } */
+
+#include "pr104921.x"
diff --git a/gcc/testsuite/gcc.target/aarch64/pr104921.x b/gcc/testsuite/gcc.target/aarch64/pr104921.x
new file mode 100644
index 00000000000..1e1a6f24e22
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/pr104921.x
@@ -0,0 +1,9 @@ 
+#include <arm_neon.h>
+
+float32x4_t
+foo(float32x4_t x, bfloat16x8_t a)
+{
+  register bfloat16x4_t b asm ("v16");
+  asm volatile ("" : "=w"(b));
+  return vbfmlalbq_lane_f32 (x, a, b, 0);
+}