diff mbox series

rs6000: Support [u]mod<mode>3 for vector modulo insns

Message ID 549ff7f8-690e-8710-3c63-76df21006704@linux.ibm.com
State New
Headers show
Series rs6000: Support [u]mod<mode>3 for vector modulo insns | expand

Commit Message

Kewen.Lin July 7, 2021, 9:03 a.m. UTC
Hi,

This patch is to make Power10 newly introduced vector
modulo instructions exploited in vectorized loops, it
just simply renames existing define_insns as standard
pattern names.

Is it ok for trunk?

BR,
Kewen
-----
gcc/ChangeLog:

	* config/rs6000/rs6000-builtin.def (MODS_V2DI, MODS_V4SI, MODU_V2DI,
	MODU_V4SI): Adjust.
	* config/rs6000/vsx.md (mods_<mode>): Renamed to...
	(mod<mode>3): ... this.
	(modu_<mode>): Renamed to...
	(umod<mode>3): ... this.

gcc/testsuite/ChangeLog:

	* gcc.target/powerpc/mod-vectorize.c: New test.
---
 gcc/config/rs6000/rs6000-builtin.def          |  8 ++--
 gcc/config/rs6000/vsx.md                      |  4 +-
 .../gcc.target/powerpc/mod-vectorize.c        | 46 +++++++++++++++++++
 3 files changed, 52 insertions(+), 6 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/powerpc/mod-vectorize.c

Comments

Segher Boessenkool July 7, 2021, 5:10 p.m. UTC | #1
Hi!

On Wed, Jul 07, 2021 at 05:03:23PM +0800, Kewen.Lin wrote:
> This patch is to make Power10 newly introduced vector
> modulo instructions exploited in vectorized loops, it
> just simply renames existing define_insns as standard
> pattern names.

> 
> Is it ok for trunk?
> 
> BR,
> Kewen
> -----
> gcc/ChangeLog:
> 
> 	* config/rs6000/rs6000-builtin.def (MODS_V2DI, MODS_V4SI, MODU_V2DI,
> 	MODU_V4SI): Adjust.
> 	* config/rs6000/vsx.md (mods_<mode>): Renamed to...
> 	(mod<mode>3): ... this.
> 	(modu_<mode>): Renamed to...
> 	(umod<mode>3): ... this.

("Rename", not "Renamed")

(It reads better if you put the "Adjust" entry after the rest, btw)

So I suppose the new testcase FAILs without these changes?

Okay for trunk.  Thanks!


Segher
Kewen.Lin July 8, 2021, 12:20 a.m. UTC | #2
Hi Segher,

on 2021/7/8 上午1:10, Segher Boessenkool wrote:
> Hi!
> 
> On Wed, Jul 07, 2021 at 05:03:23PM +0800, Kewen.Lin wrote:
>> This patch is to make Power10 newly introduced vector
>> modulo instructions exploited in vectorized loops, it
>> just simply renames existing define_insns as standard
>> pattern names.
> 
>>
>> Is it ok for trunk?
>>
>> BR,
>> Kewen
>> -----
>> gcc/ChangeLog:
>>
>> 	* config/rs6000/rs6000-builtin.def (MODS_V2DI, MODS_V4SI, MODU_V2DI,
>> 	MODU_V4SI): Adjust.
>> 	* config/rs6000/vsx.md (mods_<mode>): Renamed to...
>> 	(mod<mode>3): ... this.
>> 	(modu_<mode>): Renamed to...
>> 	(umod<mode>3): ... this.
> 
> ("Rename", not "Renamed")
> 

oh, forgot to use imperative form, will fix.

> (It reads better if you put the "Adjust" entry after the rest, btw)
> 

Will switch them.  :)

> So I suppose the new testcase FAILs without these changes?
> 

Yes, it fails without these changes.

> Okay for trunk.  Thanks!
> 

Thanks for the reviews!

BR,
Kewen
diff mbox series

Patch

diff --git a/gcc/config/rs6000/rs6000-builtin.def b/gcc/config/rs6000/rs6000-builtin.def
index d7ce4de421e..592efe31b04 100644
--- a/gcc/config/rs6000/rs6000-builtin.def
+++ b/gcc/config/rs6000/rs6000-builtin.def
@@ -3012,10 +3012,10 @@  BU_P10V_AV_2 (DIVS_V4SI, "vdivsw", CONST, divv4si3)
 BU_P10V_AV_2 (DIVS_V2DI, "vdivsd", CONST, divv2di3)
 BU_P10V_AV_2 (DIVU_V4SI, "vdivuw", CONST, udivv4si3)
 BU_P10V_AV_2 (DIVU_V2DI, "vdivud", CONST, udivv2di3)
-BU_P10V_AV_2 (MODS_V2DI, "vmodsd", CONST, mods_v2di)
-BU_P10V_AV_2 (MODS_V4SI, "vmodsw", CONST, mods_v4si)
-BU_P10V_AV_2 (MODU_V2DI, "vmodud", CONST, modu_v2di)
-BU_P10V_AV_2 (MODU_V4SI, "vmoduw", CONST, modu_v4si)
+BU_P10V_AV_2 (MODS_V2DI, "vmodsd", CONST, modv2di3)
+BU_P10V_AV_2 (MODS_V4SI, "vmodsw", CONST, modv4si3)
+BU_P10V_AV_2 (MODU_V2DI, "vmodud", CONST, umodv2di3)
+BU_P10V_AV_2 (MODU_V4SI, "vmoduw", CONST, umodv4si3)
 BU_P10V_AV_2 (MULHS_V2DI, "vmulhsd", CONST, mulhs_v2di)
 BU_P10V_AV_2 (MULHS_V4SI, "vmulhsw", CONST, mulhs_v4si)
 BU_P10V_AV_2 (MULHU_V2DI, "vmulhud", CONST, mulhu_v2di)
diff --git a/gcc/config/rs6000/vsx.md b/gcc/config/rs6000/vsx.md
index f2260badf70..f622873d758 100644
--- a/gcc/config/rs6000/vsx.md
+++ b/gcc/config/rs6000/vsx.md
@@ -6333,7 +6333,7 @@  (define_insn "udiv<mode>3"
   [(set_attr "type" "vecdiv")
    (set_attr "size" "<bits>")])
 
-(define_insn "mods_<mode>"
+(define_insn "mod<mode>3"
   [(set (match_operand:VIlong 0 "vsx_register_operand" "=v")
 	(mod:VIlong (match_operand:VIlong 1 "vsx_register_operand" "v")
 		    (match_operand:VIlong 2 "vsx_register_operand" "v")))]
@@ -6342,7 +6342,7 @@  (define_insn "mods_<mode>"
   [(set_attr "type" "vecdiv")
    (set_attr "size" "<bits>")])
 
-(define_insn "modu_<mode>"
+(define_insn "umod<mode>3"
   [(set (match_operand:VIlong 0 "vsx_register_operand" "=v")
 	(umod:VIlong (match_operand:VIlong 1 "vsx_register_operand" "v")
 		     (match_operand:VIlong 2 "vsx_register_operand" "v")))]
diff --git a/gcc/testsuite/gcc.target/powerpc/mod-vectorize.c b/gcc/testsuite/gcc.target/powerpc/mod-vectorize.c
new file mode 100644
index 00000000000..4d4f5cd6446
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/mod-vectorize.c
@@ -0,0 +1,46 @@ 
+/* { dg-require-effective-target power10_ok } */
+/* { dg-options "-mdejagnu-cpu=power10 -O2 -ftree-vectorize -fno-vect-cost-model -fno-unroll-loops -fdump-tree-vect-details" } */
+
+/* Test vectorizer can exploit ISA 3.1 instructions Vector Modulo
+   Signed/Unsigned Word/Doubleword for word/doubleword modulo operations.  */
+
+#define N 128
+
+extern signed int si_a[N], si_b[N], si_c[N];
+extern unsigned int ui_a[N], ui_b[N], ui_c[N];
+extern signed long long sd_a[N], sd_b[N], sd_c[N];
+extern unsigned long long ud_a[N], ud_b[N], ud_c[N];
+
+__attribute__ ((noipa)) void
+test_si ()
+{
+  for (int i = 0; i < N; i++)
+    si_c[i] = si_a[i] % si_b[i];
+}
+
+__attribute__ ((noipa)) void
+test_ui ()
+{
+  for (int i = 0; i < N; i++)
+    ui_c[i] = ui_a[i] % ui_b[i];
+}
+
+__attribute__ ((noipa)) void
+test_sd ()
+{
+  for (int i = 0; i < N; i++)
+    sd_c[i] = sd_a[i] % sd_b[i];
+}
+
+__attribute__ ((noipa)) void
+test_ud ()
+{
+  for (int i = 0; i < N; i++)
+    ud_c[i] = ud_a[i] % ud_b[i];
+}
+
+/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 4 "vect" } } */
+/* { dg-final { scan-assembler-times {\mvmodsw\M} 1 } } */
+/* { dg-final { scan-assembler-times {\mvmoduw\M} 1 } } */
+/* { dg-final { scan-assembler-times {\mvmodsd\M} 1 } } */
+/* { dg-final { scan-assembler-times {\mvmodud\M} 1 } } */