Message ID | f8021191-7ec1-8683-5a33-7cf91172cb42@arm.com |
---|---|
State | New |
Headers | show |
Series | arm: Fix MVE vcreate definition | expand |
> -----Original Message----- > From: Stam Markianos-Wright <Stam.Markianos-Wright@arm.com> > Sent: Wednesday, March 29, 2023 11:50 AM > To: gcc-patches@gcc.gnu.org > Cc: Kyrylo Tkachov <Kyrylo.Tkachov@arm.com> > Subject: arm: Fix MVE vcreate definition > > Hi all, > > I just found a bug that goes back to the initial merge of > the MVE backend: The vcreate intrinsic has had it's vector > lanes mixed up, compared to what was intended (as per > the ACLE) definition. This is also a discrepancy with clang: > https://godbolt.org/z/4n93e5aqj > > This patches simply switches the operands around and > makes the tests more specific on the input registers > (I do not touch the output Q regs as they vary based > on softfp/hardfp or the input registers when the input > is a constant, since, in that case, a single register > is loaded with a constant and then the same register is > used twice as "vmov q0[2], q0[0], r2, r2" and the reg > num might also not always be guaranteed). > > No regressions on MVE tesctsuite configurations or in > the CMSIS-NN testsuite. > > Ok for trunk? (Despite this being late in Stage 4, sorry > about that!) Ok, since this is a wrong-code fix. This needs backports as well, right? Thanks, Kyrill > > Thanks, > Stamatis Markianos-Wright > > gcc/ChangeLog: > > * config/arm/mve.md (mve_vcvtq_n_to_f_<supf><mode>): Swap > operands. > (mve_vcreateq_f<mode>): Swap operands. > > gcc/testsuite/ChangeLog: > > * gcc.target/arm/mve/intrinsics/vcreateq_f16.c: Tighten test. > * gcc.target/arm/mve/intrinsics/vcreateq_f32.c: Tighten test. > * gcc.target/arm/mve/intrinsics/vcreateq_s16.c: Tighten test. > * gcc.target/arm/mve/intrinsics/vcreateq_s32.c: Tighten test. > * gcc.target/arm/mve/intrinsics/vcreateq_s64.c: Tighten test. > * gcc.target/arm/mve/intrinsics/vcreateq_s8.c: Tighten test. > * gcc.target/arm/mve/intrinsics/vcreateq_u16.c: Tighten test. > * gcc.target/arm/mve/intrinsics/vcreateq_u32.c: Tighten test. > * gcc.target/arm/mve/intrinsics/vcreateq_u64.c: Tighten test. > * gcc.target/arm/mve/intrinsics/vcreateq_u8.c: Tighten test.
On 29/03/2023 13:16, Kyrylo Tkachov wrote: >> -----Original Message----- >> From: Stam Markianos-Wright<Stam.Markianos-Wright@arm.com> >> Sent: Wednesday, March 29, 2023 11:50 AM >> To:gcc-patches@gcc.gnu.org >> Cc: Kyrylo Tkachov<Kyrylo.Tkachov@arm.com> >> Subject: arm: Fix MVE vcreate definition >> >> Hi all, >> >> I just found a bug that goes back to the initial merge of >> the MVE backend: The vcreate intrinsic has had it's vector >> lanes mixed up, compared to what was intended (as per >> the ACLE) definition. This is also a discrepancy with clang: >> https://godbolt.org/z/4n93e5aqj >> >> This patches simply switches the operands around and >> makes the tests more specific on the input registers >> (I do not touch the output Q regs as they vary based >> on softfp/hardfp or the input registers when the input >> is a constant, since, in that case, a single register >> is loaded with a constant and then the same register is >> used twice as "vmov q0[2], q0[0], r2, r2" and the reg >> num might also not always be guaranteed). >> >> No regressions on MVE tesctsuite configurations or in >> the CMSIS-NN testsuite. >> >> Ok for trunk? (Despite this being late in Stage 4, sorry >> about that!) > Ok, since this is a wrong-code fix. Thanks, applied as: 3f0ca7a3e4431534bff3b8eb73709cc822e489b0. > This needs backports as well, right? Indeed! I'm building up a larger list of commits that we're hoping to backport, so I will include this on that list. > Thanks, > Kyrill > >> Thanks, >> Stamatis Markianos-Wright >> >> gcc/ChangeLog: >> >> * config/arm/mve.md (mve_vcvtq_n_to_f_<supf><mode>): Swap >> operands. >> (mve_vcreateq_f<mode>): Swap operands. >> >> gcc/testsuite/ChangeLog: >> >> * gcc.target/arm/mve/intrinsics/vcreateq_f16.c: Tighten test. >> * gcc.target/arm/mve/intrinsics/vcreateq_f32.c: Tighten test. >> * gcc.target/arm/mve/intrinsics/vcreateq_s16.c: Tighten test. >> * gcc.target/arm/mve/intrinsics/vcreateq_s32.c: Tighten test. >> * gcc.target/arm/mve/intrinsics/vcreateq_s64.c: Tighten test. >> * gcc.target/arm/mve/intrinsics/vcreateq_s8.c: Tighten test. >> * gcc.target/arm/mve/intrinsics/vcreateq_u16.c: Tighten test. >> * gcc.target/arm/mve/intrinsics/vcreateq_u32.c: Tighten test. >> * gcc.target/arm/mve/intrinsics/vcreateq_u64.c: Tighten test. >> * gcc.target/arm/mve/intrinsics/vcreateq_u8.c: Tighten test.
diff --git a/gcc/config/arm/mve.md b/gcc/config/arm/mve.md index d913ca24f8ea8d2fcadea972e037ede6f9cf36f9..a3589b19edd7398f66f8dc51276cf94345ec66a5 100644 --- a/gcc/config/arm/mve.md +++ b/gcc/config/arm/mve.md @@ -763,7 +763,7 @@ VCREATEQ_F)) ] "TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT" - "vmov %q0[2], %q0[0], %Q2, %Q1\;vmov %q0[3], %q0[1], %R2, %R1" + "vmov %q0[2], %q0[0], %Q1, %Q2\;vmov %q0[3], %q0[1], %R1, %R2" [(set_attr "type" "mve_move") (set_attr "length""8")]) @@ -778,7 +778,7 @@ VCREATEQ)) ] "TARGET_HAVE_MVE" - "vmov %q0[2], %q0[0], %Q2, %Q1\;vmov %q0[3], %q0[1], %R2, %R1" + "vmov %q0[2], %q0[0], %Q1, %Q2\;vmov %q0[3], %q0[1], %R1, %R2" [(set_attr "type" "mve_move") (set_attr "length""8")]) diff --git a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vcreateq_f16.c b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vcreateq_f16.c index 0458bb1bb7cd6a3f898f3138f86d9c52374ae48d..8d6764d893834bb751ba79476f67ef5111ee1775 100644 --- a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vcreateq_f16.c +++ b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vcreateq_f16.c @@ -12,8 +12,8 @@ extern "C" { /* **foo: ** ... -** vmov q[0-9+]\[2\], q[0-9+]\[0\], r[0-9+], r[0-9+] -** vmov q[0-9+]\[3\], q[0-9+]\[1\], r[0-9+], r[0-9+] +** vmov q[0-9+]\[2\], q[0-9+]\[0\], r0, r2 +** vmov q[0-9+]\[3\], q[0-9+]\[1\], r1, r3 ** ... */ float16x8_t @@ -39,4 +39,4 @@ foo1 () } #endif -/* { dg-final { scan-assembler-not "__ARM_undef" } } */ \ No newline at end of file +/* { dg-final { scan-assembler-not "__ARM_undef" } } */ diff --git a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vcreateq_f32.c b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vcreateq_f32.c index af782b5ac5379f6890af03c3f5ae6ef41492f623..6ab05ced809ec38eb5b72123120a0c822cf3e351 100644 --- a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vcreateq_f32.c +++ b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vcreateq_f32.c @@ -12,8 +12,8 @@ extern "C" { /* **foo: ** ... -** vmov q[0-9+]\[2\], q[0-9+]\[0\], r[0-9+], r[0-9+] -** vmov q[0-9+]\[3\], q[0-9+]\[1\], r[0-9+], r[0-9+] +** vmov q[0-9+]\[2\], q[0-9+]\[0\], r0, r2 +** vmov q[0-9+]\[3\], q[0-9+]\[1\], r1, r3 ** ... */ float32x4_t @@ -39,4 +39,4 @@ foo1 () } #endif -/* { dg-final { scan-assembler-not "__ARM_undef" } } */ \ No newline at end of file +/* { dg-final { scan-assembler-not "__ARM_undef" } } */ diff --git a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vcreateq_s16.c b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vcreateq_s16.c index 8a3e91843f8cdece415d685b13710e4d250d8da0..290637595a4a26c019abcb6e85f1741d72ade93f 100644 --- a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vcreateq_s16.c +++ b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vcreateq_s16.c @@ -12,8 +12,8 @@ extern "C" { /* **foo: ** ... -** vmov q[0-9+]\[2\], q[0-9+]\[0\], r[0-9+], r[0-9+] -** vmov q[0-9+]\[3\], q[0-9+]\[1\], r[0-9+], r[0-9+] +** vmov q[0-9+]\[2\], q[0-9+]\[0\], r0, r2 +** vmov q[0-9+]\[3\], q[0-9+]\[1\], r1, r3 ** ... */ int16x8_t @@ -39,4 +39,4 @@ foo1 () } #endif -/* { dg-final { scan-assembler-not "__ARM_undef" } } */ \ No newline at end of file +/* { dg-final { scan-assembler-not "__ARM_undef" } } */ diff --git a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vcreateq_s32.c b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vcreateq_s32.c index 5e385dfeee0ad67b5c9cf1c638af315d25a4a4d3..4aeead1175eae97ca579bd076fe77600735d47a5 100644 --- a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vcreateq_s32.c +++ b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vcreateq_s32.c @@ -12,8 +12,8 @@ extern "C" { /* **foo: ** ... -** vmov q[0-9+]\[2\], q[0-9+]\[0\], r[0-9+], r[0-9+] -** vmov q[0-9+]\[3\], q[0-9+]\[1\], r[0-9+], r[0-9+] +** vmov q[0-9+]\[2\], q[0-9+]\[0\], r0, r2 +** vmov q[0-9+]\[3\], q[0-9+]\[1\], r1, r3 ** ... */ int32x4_t @@ -39,4 +39,4 @@ foo1 () } #endif -/* { dg-final { scan-assembler-not "__ARM_undef" } } */ \ No newline at end of file +/* { dg-final { scan-assembler-not "__ARM_undef" } } */ diff --git a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vcreateq_s64.c b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vcreateq_s64.c index df901680c2bb5768525e4451462accaa5baf3e60..9f6df427a8fb51c04ac1a10590e6ca70b31aea4c 100644 --- a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vcreateq_s64.c +++ b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vcreateq_s64.c @@ -12,8 +12,8 @@ extern "C" { /* **foo: ** ... -** vmov q[0-9+]\[2\], q[0-9+]\[0\], r[0-9+], r[0-9+] -** vmov q[0-9+]\[3\], q[0-9+]\[1\], r[0-9+], r[0-9+] +** vmov q[0-9+]\[2\], q[0-9+]\[0\], r0, r2 +** vmov q[0-9+]\[3\], q[0-9+]\[1\], r1, r3 ** ... */ int64x2_t @@ -39,4 +39,4 @@ foo1 () } #endif -/* { dg-final { scan-assembler-not "__ARM_undef" } } */ \ No newline at end of file +/* { dg-final { scan-assembler-not "__ARM_undef" } } */ diff --git a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vcreateq_s8.c b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vcreateq_s8.c index e853395af0c684ac32d16a76179b65a7deb835ca..196c147fb65010fbcb5ab5bd7f00cf056807b11a 100644 --- a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vcreateq_s8.c +++ b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vcreateq_s8.c @@ -12,8 +12,8 @@ extern "C" { /* **foo: ** ... -** vmov q[0-9+]\[2\], q[0-9+]\[0\], r[0-9+], r[0-9+] -** vmov q[0-9+]\[3\], q[0-9+]\[1\], r[0-9+], r[0-9+] +** vmov q[0-9+]\[2\], q[0-9+]\[0\], r0, r2 +** vmov q[0-9+]\[3\], q[0-9+]\[1\], r1, r3 ** ... */ int8x16_t @@ -39,4 +39,4 @@ foo1 () } #endif -/* { dg-final { scan-assembler-not "__ARM_undef" } } */ \ No newline at end of file +/* { dg-final { scan-assembler-not "__ARM_undef" } } */ diff --git a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vcreateq_u16.c b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vcreateq_u16.c index bf4a137bb43fc84e4fe04a0935a6851dd61a0735..20b18e2ac15f2765d4ce20e7363f0b5822a5121a 100644 --- a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vcreateq_u16.c +++ b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vcreateq_u16.c @@ -12,8 +12,8 @@ extern "C" { /* **foo: ** ... -** vmov q[0-9+]\[2\], q[0-9+]\[0\], r[0-9+], r[0-9+] -** vmov q[0-9+]\[3\], q[0-9+]\[1\], r[0-9+], r[0-9+] +** vmov q[0-9+]\[2\], q[0-9+]\[0\], r0, r2 +** vmov q[0-9+]\[3\], q[0-9+]\[1\], r1, r3 ** ... */ uint16x8_t @@ -39,4 +39,4 @@ foo1 () } #endif -/* { dg-final { scan-assembler-not "__ARM_undef" } } */ \ No newline at end of file +/* { dg-final { scan-assembler-not "__ARM_undef" } } */ diff --git a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vcreateq_u32.c b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vcreateq_u32.c index efb58ddff8bb0fe77bcd2777e87de71aca1bd456..febfd3bd7828c2fa5598407741d9dc27e248772c 100644 --- a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vcreateq_u32.c +++ b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vcreateq_u32.c @@ -12,8 +12,8 @@ extern "C" { /* **foo: ** ... -** vmov q[0-9+]\[2\], q[0-9+]\[0\], r[0-9+], r[0-9+] -** vmov q[0-9+]\[3\], q[0-9+]\[1\], r[0-9+], r[0-9+] +** vmov q[0-9+]\[2\], q[0-9+]\[0\], r0, r2 +** vmov q[0-9+]\[3\], q[0-9+]\[1\], r1, r3 ** ... */ uint32x4_t @@ -39,4 +39,4 @@ foo1 () } #endif -/* { dg-final { scan-assembler-not "__ARM_undef" } } */ \ No newline at end of file +/* { dg-final { scan-assembler-not "__ARM_undef" } } */ diff --git a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vcreateq_u64.c b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vcreateq_u64.c index 91dd885dce1cbc9b7a73029651160fd3e3733727..5a49b346bf4fcd8bb59d0702190c8dd576b261e2 100644 --- a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vcreateq_u64.c +++ b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vcreateq_u64.c @@ -12,8 +12,8 @@ extern "C" { /* **foo: ** ... -** vmov q[0-9+]\[2\], q[0-9+]\[0\], r[0-9+], r[0-9+] -** vmov q[0-9+]\[3\], q[0-9+]\[1\], r[0-9+], r[0-9+] +** vmov q[0-9+]\[2\], q[0-9+]\[0\], r0, r2 +** vmov q[0-9+]\[3\], q[0-9+]\[1\], r1, r3 ** ... */ uint64x2_t @@ -39,4 +39,4 @@ foo1 () } #endif -/* { dg-final { scan-assembler-not "__ARM_undef" } } */ \ No newline at end of file +/* { dg-final { scan-assembler-not "__ARM_undef" } } */ diff --git a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vcreateq_u8.c b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vcreateq_u8.c index d5d001158ed8f042cd962564e62ade27984aeb95..c0ac5e512254f6cb8642063a38b2571a18af8663 100644 --- a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vcreateq_u8.c +++ b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vcreateq_u8.c @@ -12,8 +12,8 @@ extern "C" { /* **foo: ** ... -** vmov q[0-9+]\[2\], q[0-9+]\[0\], r[0-9+], r[0-9+] -** vmov q[0-9+]\[3\], q[0-9+]\[1\], r[0-9+], r[0-9+] +** vmov q[0-9+]\[2\], q[0-9+]\[0\], r0, r2 +** vmov q[0-9+]\[3\], q[0-9+]\[1\], r1, r3 ** ... */ uint8x16_t @@ -39,4 +39,4 @@ foo1 () } #endif -/* { dg-final { scan-assembler-not "__ARM_undef" } } */ \ No newline at end of file +/* { dg-final { scan-assembler-not "__ARM_undef" } } */