Message ID | VI1PR08MB53257C3EDE74997F50F461BAFF599@VI1PR08MB5325.eurprd08.prod.outlook.com |
---|---|
State | New |
Headers | show |
Series | None | expand |
Forgot to include the list > -----Original Message----- > From: Tamar Christina > Sent: Tuesday, May 25, 2021 3:57 PM > To: Tamar Christina <Tamar.Christina@arm.com> > Cc: Richard Earnshaw <Richard.Earnshaw@arm.com>; nd <nd@arm.com>; > Ramana Radhakrishnan <Ramana.Radhakrishnan@arm.com>; Kyrylo Tkachov > <Kyrylo.Tkachov@arm.com> > Subject: RE: [PATCH 3/4][AArch32]: Add support for sign differing dot- > product usdot for NEON. > > Hi All, > > This is a respin based on the feedback gotten from the AArch64 review. > > Bootstrapped Regtested on aarch64-none-linux-gnu and no issues. > > Ok for master? > > Thanks, > Tamar > > gcc/ChangeLog: > > * config/arm/neon.md (usdot_prod<vsi2qi>): New. > > gcc/testsuite/ChangeLog: > > * gcc.target/arm/simd/vusdot-autovec.c: New test. > > > -----Original Message----- > > From: Gcc-patches <gcc-patches-bounces@gcc.gnu.org> On Behalf Of > Tamar > > Christina via Gcc-patches > > Sent: Wednesday, May 5, 2021 6:42 PM > > To: gcc Patches <gcc-patches@gcc.gnu.org> > > Cc: Richard Earnshaw <Richard.Earnshaw@arm.com>; nd <nd@arm.com>; > > Ramana Radhakrishnan <Ramana.Radhakrishnan@arm.com> > > Subject: FW: [PATCH 3/4][AArch32]: Add support for sign differing dot- > > product usdot for NEON. > > > > Forgot to CC maintainers.. > > > > -----Original Message----- > > From: Tamar Christina <tamar.christina@arm.com> > > Sent: Wednesday, May 5, 2021 6:39 PM > > To: gcc-patches@gcc.gnu.org > > Cc: nd <nd@arm.com> > > Subject: [PATCH 3/4][AArch32]: Add support for sign differing > > dot-product usdot for NEON. > > > > Hi All, > > > > This adds optabs implementing usdot_prod. > > > > The following testcase: > > > > #define N 480 > > #define SIGNEDNESS_1 unsigned > > #define SIGNEDNESS_2 signed > > #define SIGNEDNESS_3 signed > > #define SIGNEDNESS_4 unsigned > > > > SIGNEDNESS_1 int __attribute__ ((noipa)) f (SIGNEDNESS_1 int res, > > SIGNEDNESS_3 char *restrict a, > > SIGNEDNESS_4 char *restrict b) > > { > > for (__INTPTR_TYPE__ i = 0; i < N; ++i) > > { > > int av = a[i]; > > int bv = b[i]; > > SIGNEDNESS_2 short mult = av * bv; > > res += mult; > > } > > return res; > > } > > > > Generates > > > > f: > > vmov.i32 q8, #0 @ v4si > > add r3, r2, #480 > > .L2: > > vld1.8 {q10}, [r2]! > > vld1.8 {q9}, [r1]! > > vusdot.s8 q8, q9, q10 > > cmp r3, r2 > > bne .L2 > > vadd.i32 d16, d16, d17 > > vpadd.i32 d16, d16, d16 > > vmov.32 r3, d16[0] > > add r0, r0, r3 > > bx lr > > > > instead of > > > > f: > > vmov.i32 q8, #0 @ v4si > > add r3, r2, #480 > > .L2: > > vld1.8 {q9}, [r2]! > > vld1.8 {q11}, [r1]! > > cmp r3, r2 > > vmull.s8 q10, d18, d22 > > vmull.s8 q9, d19, d23 > > vaddw.s16 q8, q8, d20 > > vaddw.s16 q8, q8, d21 > > vaddw.s16 q8, q8, d18 > > vaddw.s16 q8, q8, d19 > > bne .L2 > > vadd.i32 d16, d16, d17 > > vpadd.i32 d16, d16, d16 > > vmov.32 r3, d16[0] > > add r0, r0, r3 > > bx lr > > > > For NEON. I couldn't figure out if the MVE instruction vmlaldav.s16 > > could be used to emulate this. Because it would require additional > > widening to work I left MVE out of this patch set but perhaps someone > should take a look. > > > > Bootstrapped Regtested on aarch64-none-linux-gnu and no issues. > > > > Ok for master? > > > > Thanks, > > Tamar > > > > gcc/ChangeLog: > > > > * config/arm/neon.md (usdot_prod<vsi2qi>): New. > > > > gcc/testsuite/ChangeLog: > > > > * gcc.target/arm/simd/vusdot-autovec.c: New test. > > > > --- inline copy of patch -- > > diff --git a/gcc/config/arm/neon.md b/gcc/config/arm/neon.md index > > > fec2cc91d24b6eff7b6fc8fdd54f39b3d646c468..23ad411178db77c5d19bee7452 > > bc1070331c1aa0 100644 > > --- a/gcc/config/arm/neon.md > > +++ b/gcc/config/arm/neon.md > > @@ -3075,6 +3075,24 @@ (define_expand "<sup>dot_prod<vsi2qi>" > > DONE; > > }) > > > > +;; Auto-vectorizer pattern for usdot > > +(define_expand "usdot_prod<vsi2qi>" > > + [(set (match_operand:VCVTI 0 "register_operand") > > + (plus:VCVTI (unspec:VCVTI [(match_operand:<VSI2QI> 1 > > + "register_operand") > > + (match_operand:<VSI2QI> 2 > > + "register_operand")] > > + UNSPEC_DOT_US) > > + (match_operand:VCVTI 3 "register_operand")))] > > + "TARGET_I8MM" > > +{ > > + emit_insn ( > > + gen_neon_usdot<vsi2qi> (operands[3], operands[3], operands[1], > > + operands[2])); > > + emit_insn (gen_rtx_SET (operands[0], operands[3])); > > + DONE; > > +}) > > + > > (define_expand "neon_copysignf<mode>" > > [(match_operand:VCVTF 0 "register_operand") > > (match_operand:VCVTF 1 "register_operand") diff --git > > a/gcc/testsuite/gcc.target/arm/simd/vusdot-autovec.c > > b/gcc/testsuite/gcc.target/arm/simd/vusdot-autovec.c > > new file mode 100644 > > index > > > 0000000000000000000000000000000000000000..7cc56f68817d77d6950df0ab37 > > 2d6fbaad6b3813 > > --- /dev/null > > +++ b/gcc/testsuite/gcc.target/arm/simd/vusdot-autovec.c > > @@ -0,0 +1,38 @@ > > +/* { dg-do compile } */ > > +/* { dg-options "-O3 -march=armv8.2-a+i8mm" } */ > > + > > +#define N 480 > > +#define SIGNEDNESS_1 unsigned > > +#define SIGNEDNESS_2 signed > > +#define SIGNEDNESS_3 signed > > +#define SIGNEDNESS_4 unsigned > > + > > +SIGNEDNESS_1 int __attribute__ ((noipa)) f (SIGNEDNESS_1 int res, > > +SIGNEDNESS_3 char *restrict a, > > + SIGNEDNESS_4 char *restrict b) > > +{ > > + for (__INTPTR_TYPE__ i = 0; i < N; ++i) > > + { > > + int av = a[i]; > > + int bv = b[i]; > > + SIGNEDNESS_2 short mult = av * bv; > > + res += mult; > > + } > > + return res; > > +} > > + > > +SIGNEDNESS_1 int __attribute__ ((noipa)) g (SIGNEDNESS_1 int res, > > +SIGNEDNESS_3 char *restrict b, > > + SIGNEDNESS_4 char *restrict a) > > +{ > > + for (__INTPTR_TYPE__ i = 0; i < N; ++i) > > + { > > + int av = a[i]; > > + int bv = b[i]; > > + SIGNEDNESS_2 short mult = av * bv; > > + res += mult; > > + } > > + return res; > > +} > > + > > +/* { dg-final { scan-assembler-times {vusdot.s8} 2 { target { > > +arm-*-*-gnueabihf } } } } */ > > > > > > --
> -----Original Message----- > From: Tamar Christina <Tamar.Christina@arm.com> > Sent: 25 May 2021 16:02 > To: gcc-patches@gcc.gnu.org > Cc: Richard Earnshaw <Richard.Earnshaw@arm.com>; nd <nd@arm.com>; > Ramana Radhakrishnan <Ramana.Radhakrishnan@arm.com>; Kyrylo > Tkachov <Kyrylo.Tkachov@arm.com> > Subject: RE: [PATCH 3/4][AArch32]: Add support for sign differing dot- > product usdot for NEON. > > Forgot to include the list > > > -----Original Message----- > > From: Tamar Christina > > Sent: Tuesday, May 25, 2021 3:57 PM > > To: Tamar Christina <Tamar.Christina@arm.com> > > Cc: Richard Earnshaw <Richard.Earnshaw@arm.com>; nd <nd@arm.com>; > > Ramana Radhakrishnan <Ramana.Radhakrishnan@arm.com>; Kyrylo > Tkachov > > <Kyrylo.Tkachov@arm.com> > > Subject: RE: [PATCH 3/4][AArch32]: Add support for sign differing dot- > > product usdot for NEON. > > > > Hi All, > > > > This is a respin based on the feedback gotten from the AArch64 review. > > > > Bootstrapped Regtested on aarch64-none-linux-gnu and no issues. > > > > Ok for master? > > Ok. Thanks, Kyrill > > Thanks, > > Tamar > > > > gcc/ChangeLog: > > > > * config/arm/neon.md (usdot_prod<vsi2qi>): New. > > > > gcc/testsuite/ChangeLog: > > > > * gcc.target/arm/simd/vusdot-autovec.c: New test. > > > > > -----Original Message----- > > > From: Gcc-patches <gcc-patches-bounces@gcc.gnu.org> On Behalf Of > > Tamar > > > Christina via Gcc-patches > > > Sent: Wednesday, May 5, 2021 6:42 PM > > > To: gcc Patches <gcc-patches@gcc.gnu.org> > > > Cc: Richard Earnshaw <Richard.Earnshaw@arm.com>; nd > <nd@arm.com>; > > > Ramana Radhakrishnan <Ramana.Radhakrishnan@arm.com> > > > Subject: FW: [PATCH 3/4][AArch32]: Add support for sign differing dot- > > > product usdot for NEON. > > > > > > Forgot to CC maintainers.. > > > > > > -----Original Message----- > > > From: Tamar Christina <tamar.christina@arm.com> > > > Sent: Wednesday, May 5, 2021 6:39 PM > > > To: gcc-patches@gcc.gnu.org > > > Cc: nd <nd@arm.com> > > > Subject: [PATCH 3/4][AArch32]: Add support for sign differing > > > dot-product usdot for NEON. > > > > > > Hi All, > > > > > > This adds optabs implementing usdot_prod. > > > > > > The following testcase: > > > > > > #define N 480 > > > #define SIGNEDNESS_1 unsigned > > > #define SIGNEDNESS_2 signed > > > #define SIGNEDNESS_3 signed > > > #define SIGNEDNESS_4 unsigned > > > > > > SIGNEDNESS_1 int __attribute__ ((noipa)) f (SIGNEDNESS_1 int res, > > > SIGNEDNESS_3 char *restrict a, > > > SIGNEDNESS_4 char *restrict b) > > > { > > > for (__INTPTR_TYPE__ i = 0; i < N; ++i) > > > { > > > int av = a[i]; > > > int bv = b[i]; > > > SIGNEDNESS_2 short mult = av * bv; > > > res += mult; > > > } > > > return res; > > > } > > > > > > Generates > > > > > > f: > > > vmov.i32 q8, #0 @ v4si > > > add r3, r2, #480 > > > .L2: > > > vld1.8 {q10}, [r2]! > > > vld1.8 {q9}, [r1]! > > > vusdot.s8 q8, q9, q10 > > > cmp r3, r2 > > > bne .L2 > > > vadd.i32 d16, d16, d17 > > > vpadd.i32 d16, d16, d16 > > > vmov.32 r3, d16[0] > > > add r0, r0, r3 > > > bx lr > > > > > > instead of > > > > > > f: > > > vmov.i32 q8, #0 @ v4si > > > add r3, r2, #480 > > > .L2: > > > vld1.8 {q9}, [r2]! > > > vld1.8 {q11}, [r1]! > > > cmp r3, r2 > > > vmull.s8 q10, d18, d22 > > > vmull.s8 q9, d19, d23 > > > vaddw.s16 q8, q8, d20 > > > vaddw.s16 q8, q8, d21 > > > vaddw.s16 q8, q8, d18 > > > vaddw.s16 q8, q8, d19 > > > bne .L2 > > > vadd.i32 d16, d16, d17 > > > vpadd.i32 d16, d16, d16 > > > vmov.32 r3, d16[0] > > > add r0, r0, r3 > > > bx lr > > > > > > For NEON. I couldn't figure out if the MVE instruction vmlaldav.s16 > > > could be used to emulate this. Because it would require additional > > > widening to work I left MVE out of this patch set but perhaps someone > > should take a look. > > > > > > Bootstrapped Regtested on aarch64-none-linux-gnu and no issues. > > > > > > Ok for master? > > > > > > Thanks, > > > Tamar > > > > > > gcc/ChangeLog: > > > > > > * config/arm/neon.md (usdot_prod<vsi2qi>): New. > > > > > > gcc/testsuite/ChangeLog: > > > > > > * gcc.target/arm/simd/vusdot-autovec.c: New test. > > > > > > --- inline copy of patch -- > > > diff --git a/gcc/config/arm/neon.md b/gcc/config/arm/neon.md index > > > > > > fec2cc91d24b6eff7b6fc8fdd54f39b3d646c468..23ad411178db77c5d19bee74 > 52 > > > bc1070331c1aa0 100644 > > > --- a/gcc/config/arm/neon.md > > > +++ b/gcc/config/arm/neon.md > > > @@ -3075,6 +3075,24 @@ (define_expand "<sup>dot_prod<vsi2qi>" > > > DONE; > > > }) > > > > > > +;; Auto-vectorizer pattern for usdot > > > +(define_expand "usdot_prod<vsi2qi>" > > > + [(set (match_operand:VCVTI 0 "register_operand") > > > + (plus:VCVTI (unspec:VCVTI [(match_operand:<VSI2QI> 1 > > > + "register_operand") > > > + (match_operand:<VSI2QI> 2 > > > + "register_operand")] > > > + UNSPEC_DOT_US) > > > + (match_operand:VCVTI 3 "register_operand")))] > > > + "TARGET_I8MM" > > > +{ > > > + emit_insn ( > > > + gen_neon_usdot<vsi2qi> (operands[3], operands[3], operands[1], > > > + operands[2])); > > > + emit_insn (gen_rtx_SET (operands[0], operands[3])); > > > + DONE; > > > +}) > > > + > > > (define_expand "neon_copysignf<mode>" > > > [(match_operand:VCVTF 0 "register_operand") > > > (match_operand:VCVTF 1 "register_operand") diff --git > > > a/gcc/testsuite/gcc.target/arm/simd/vusdot-autovec.c > > > b/gcc/testsuite/gcc.target/arm/simd/vusdot-autovec.c > > > new file mode 100644 > > > index > > > > > > 0000000000000000000000000000000000000000..7cc56f68817d77d6950df0 > ab37 > > > 2d6fbaad6b3813 > > > --- /dev/null > > > +++ b/gcc/testsuite/gcc.target/arm/simd/vusdot-autovec.c > > > @@ -0,0 +1,38 @@ > > > +/* { dg-do compile } */ > > > +/* { dg-options "-O3 -march=armv8.2-a+i8mm" } */ > > > + > > > +#define N 480 > > > +#define SIGNEDNESS_1 unsigned > > > +#define SIGNEDNESS_2 signed > > > +#define SIGNEDNESS_3 signed > > > +#define SIGNEDNESS_4 unsigned > > > + > > > +SIGNEDNESS_1 int __attribute__ ((noipa)) f (SIGNEDNESS_1 int res, > > > +SIGNEDNESS_3 char *restrict a, > > > + SIGNEDNESS_4 char *restrict b) > > > +{ > > > + for (__INTPTR_TYPE__ i = 0; i < N; ++i) > > > + { > > > + int av = a[i]; > > > + int bv = b[i]; > > > + SIGNEDNESS_2 short mult = av * bv; > > > + res += mult; > > > + } > > > + return res; > > > +} > > > + > > > +SIGNEDNESS_1 int __attribute__ ((noipa)) g (SIGNEDNESS_1 int res, > > > +SIGNEDNESS_3 char *restrict b, > > > + SIGNEDNESS_4 char *restrict a) > > > +{ > > > + for (__INTPTR_TYPE__ i = 0; i < N; ++i) > > > + { > > > + int av = a[i]; > > > + int bv = b[i]; > > > + SIGNEDNESS_2 short mult = av * bv; > > > + res += mult; > > > + } > > > + return res; > > > +} > > > + > > > +/* { dg-final { scan-assembler-times {vusdot.s8} 2 { target { > > > +arm-*-*-gnueabihf } } } } */ > > > > > > > > > --
diff --git a/gcc/config/arm/neon.md b/gcc/config/arm/neon.md index fec2cc91d24b6eff7b6fc8fdd54f39b3d646c468..23ad411178db77c5d19bee7452bc1070331c1aa0 100644 --- a/gcc/config/arm/neon.md +++ b/gcc/config/arm/neon.md @@ -3075,6 +3075,24 @@ (define_expand "<sup>dot_prod<vsi2qi>" DONE; }) +;; Auto-vectorizer pattern for usdot +(define_expand "usdot_prod<vsi2qi>" + [(set (match_operand:VCVTI 0 "register_operand") + (plus:VCVTI (unspec:VCVTI [(match_operand:<VSI2QI> 1 + "register_operand") + (match_operand:<VSI2QI> 2 + "register_operand")] + UNSPEC_DOT_US) + (match_operand:VCVTI 3 "register_operand")))] + "TARGET_I8MM" +{ + emit_insn ( + gen_neon_usdot<vsi2qi> (operands[3], operands[3], operands[1], + operands[2])); + emit_insn (gen_rtx_SET (operands[0], operands[3])); + DONE; +}) + (define_expand "neon_copysignf<mode>" [(match_operand:VCVTF 0 "register_operand") (match_operand:VCVTF 1 "register_operand") diff --git a/gcc/testsuite/gcc.target/arm/simd/vusdot-autovec.c b/gcc/testsuite/gcc.target/arm/simd/vusdot-autovec.c new file mode 100644 index 0000000000000000000000000000000000000000..7cc56f68817d77d6950df0ab372d6fbaad6b3813 --- /dev/null +++ b/gcc/testsuite/gcc.target/arm/simd/vusdot-autovec.c @@ -0,0 +1,38 @@ +/* { dg-do compile } */ +/* { dg-options "-O3 -march=armv8.2-a+i8mm" } */ + +#define N 480 +#define SIGNEDNESS_1 unsigned +#define SIGNEDNESS_2 signed +#define SIGNEDNESS_3 signed +#define SIGNEDNESS_4 unsigned + +SIGNEDNESS_1 int __attribute__ ((noipa)) +f (SIGNEDNESS_1 int res, SIGNEDNESS_3 char *restrict a, + SIGNEDNESS_4 char *restrict b) +{ + for (__INTPTR_TYPE__ i = 0; i < N; ++i) + { + int av = a[i]; + int bv = b[i]; + SIGNEDNESS_2 short mult = av * bv; + res += mult; + } + return res; +} + +SIGNEDNESS_1 int __attribute__ ((noipa)) +g (SIGNEDNESS_1 int res, SIGNEDNESS_3 char *restrict b, + SIGNEDNESS_4 char *restrict a) +{ + for (__INTPTR_TYPE__ i = 0; i < N; ++i) + { + int av = a[i]; + int bv = b[i]; + SIGNEDNESS_2 short mult = av * bv; + res += mult; + } + return res; +} + +/* { dg-final { scan-assembler-times {vusdot.s8} 2 { target { arm-*-*-gnueabihf } } } } */
Forgot to CC maintainers.. -----Original Message----- From: Tamar Christina <tamar.christina@arm.com> Sent: Wednesday, May 5, 2021 6:39 PM To: gcc-patches@gcc.gnu.org Cc: nd <nd@arm.com> Subject: [PATCH 3/4][AArch32]: Add support for sign differing dot-product usdot for NEON. Hi All, This adds optabs implementing usdot_prod. The following testcase: #define N 480 #define SIGNEDNESS_1 unsigned #define SIGNEDNESS_2 signed #define SIGNEDNESS_3 signed #define SIGNEDNESS_4 unsigned SIGNEDNESS_1 int __attribute__ ((noipa)) f (SIGNEDNESS_1 int res, SIGNEDNESS_3 char *restrict a, SIGNEDNESS_4 char *restrict b) { for (__INTPTR_TYPE__ i = 0; i < N; ++i) { int av = a[i]; int bv = b[i]; SIGNEDNESS_2 short mult = av * bv; res += mult; } return res; } Generates f: vmov.i32 q8, #0 @ v4si add r3, r2, #480 .L2: vld1.8 {q10}, [r2]! vld1.8 {q9}, [r1]! vusdot.s8 q8, q9, q10 cmp r3, r2 bne .L2 vadd.i32 d16, d16, d17 vpadd.i32 d16, d16, d16 vmov.32 r3, d16[0] add r0, r0, r3 bx lr instead of f: vmov.i32 q8, #0 @ v4si add r3, r2, #480 .L2: vld1.8 {q9}, [r2]! vld1.8 {q11}, [r1]! cmp r3, r2 vmull.s8 q10, d18, d22 vmull.s8 q9, d19, d23 vaddw.s16 q8, q8, d20 vaddw.s16 q8, q8, d21 vaddw.s16 q8, q8, d18 vaddw.s16 q8, q8, d19 bne .L2 vadd.i32 d16, d16, d17 vpadd.i32 d16, d16, d16 vmov.32 r3, d16[0] add r0, r0, r3 bx lr For NEON. I couldn't figure out if the MVE instruction vmlaldav.s16 could be used to emulate this. Because it would require additional widening to work I left MVE out of this patch set but perhaps someone should take a look. Bootstrapped Regtested on aarch64-none-linux-gnu and no issues. Ok for master? Thanks, Tamar gcc/ChangeLog: * config/arm/neon.md (usdot_prod<vsi2qi>): New. gcc/testsuite/ChangeLog: * gcc.target/arm/simd/vusdot-autovec.c: New test. --- inline copy of patch -- diff --git a/gcc/config/arm/neon.md b/gcc/config/arm/neon.md index fec2cc91d24b6eff7b6fc8fdd54f39b3d646c468..23ad411178db77c5d19bee7452bc1070331c1aa0 100644 +arm-*-*-gnueabihf } } } } */ --