diff mbox series

nvptx: Provide vec_set<mode> and vec_extract<vmode><mode> patterns.

Message ID 000d01d65a77$6e813d60$4b83b820$@nextmovesoftware.com
State New
Headers show
Series nvptx: Provide vec_set<mode> and vec_extract<vmode><mode> patterns. | expand

Commit Message

Roger Sayle July 15, 2020, 7:13 a.m. UTC
This patch provides standard vec_extract and vec_set patterns to the
nvptx backend, to extract an element from a PTX vector and set an
element of a PTX vector respectively.  PTX vectors (I hesitate to
call them SIMD vectors) may contain up to four elements, so vector
modes up to size four are supported by this patch even though the
nvptx backend currently only allows V2SI and V2DI, i.e. two out
of the ten possible vector modes.

As an example of the improvement, the following C function:

typedef int __v2si __attribute__((__vector_size__(8)));
int foo (__v2si arg) { return arg[0]+arg[1]; }

previously generated this code using a shift:

                mov.u64 %r25, %ar0;
                ld.v2.u32       %r26, [%r25];
                mov.b64 %r28, %r26;
                shr.s64 %r30, %r28, 32;
                cvt.u32.u32     %r31, %r26.x;
                cvt.u32.u64     %r32, %r30;
                add.u32 %value, %r31, %r32;

but with this patch now generates:

                mov.u64 %r25, %ar0;
                ld.v2.u32       %r26, [%r25];
                mov.u32 %r28, %r26.x;
                mov.u32 %r29, %r26.y;
                add.u32 %value, %r28, %r29;

I've implemented these getters and setters as their own instructions
instead of attempting the much more intrusive patch of changing the
backend's definition of register_operand.  Given the limited utility
of PTX vectors, I'm not convinced that attempting to support them as
operands in every instruction would be worth the effort involved.

This patch has been tested on nvptx-none hosted on x86_64-pc-linux-gnu
with "make" and "make check" with no new regressions.
Ok for mainline?


2020-07-15  Roger Sayle  <roger@nextmovesoftware.com>

gcc/ChangeLog:
	* config/nvptx/nvptx.md (nvptx_vector_index_operand): New predicate.
	(VECELEM): New mode attribute for a vector's uppercase element mode.
	(Vecelem): New mode attribute for a vector's lowercase element mode.
	(*vec_set<mode>_0, *vec_set<mode>_1, *vec_set<mode>_2,
	*vec_set<mode>_3): New instructions.
	(vec_set<mode>): New expander to generate one of the above insns.
	(vec_extract<mode><Vecelem>): New instruction.


Thanks in advance,
Roger
--
Roger Sayle
NextMove Software
Cambridge, UK

Comments

Tom de Vries July 30, 2020, 8:43 a.m. UTC | #1
On 7/15/20 9:13 AM, Roger Sayle wrote:
> 
> This patch provides standard vec_extract and vec_set patterns to the
> nvptx backend, to extract an element from a PTX vector and set an
> element of a PTX vector respectively.  PTX vectors (I hesitate to
> call them SIMD vectors) may contain up to four elements, so vector
> modes up to size four are supported by this patch even though the
> nvptx backend currently only allows V2SI and V2DI, i.e. two out
> of the ten possible vector modes.
> 
> As an example of the improvement, the following C function:
> 
> typedef int __v2si __attribute__((__vector_size__(8)));
> int foo (__v2si arg) { return arg[0]+arg[1]; }
> 
> previously generated this code using a shift:
> 
>                 mov.u64 %r25, %ar0;
>                 ld.v2.u32       %r26, [%r25];
>                 mov.b64 %r28, %r26;
>                 shr.s64 %r30, %r28, 32;
>                 cvt.u32.u32     %r31, %r26.x;
>                 cvt.u32.u64     %r32, %r30;
>                 add.u32 %value, %r31, %r32;
> 
> but with this patch now generates:
> 
>                 mov.u64 %r25, %ar0;
>                 ld.v2.u32       %r26, [%r25];
>                 mov.u32 %r28, %r26.x;
>                 mov.u32 %r29, %r26.y;
>                 add.u32 %value, %r28, %r29;
> 
> I've implemented these getters and setters as their own instructions
> instead of attempting the much more intrusive patch of changing the
> backend's definition of register_operand.  Given the limited utility
> of PTX vectors, I'm not convinced that attempting to support them as
> operands in every instruction would be worth the effort involved.
> 
> This patch has been tested on nvptx-none hosted on x86_64-pc-linux-gnu
> with "make" and "make check" with no new regressions.
> Ok for mainline?
> 
> 
> 2020-07-15  Roger Sayle  <roger@nextmovesoftware.com>
> 
> gcc/ChangeLog:
> 	* config/nvptx/nvptx.md (nvptx_vector_index_operand): New predicate.
> 	(VECELEM): New mode attribute for a vector's uppercase element mode.
> 	(Vecelem): New mode attribute for a vector's lowercase element mode.
> 	(*vec_set<mode>_0, *vec_set<mode>_1, *vec_set<mode>_2,
> 	*vec_set<mode>_3): New instructions.
> 	(vec_set<mode>): New expander to generate one of the above insns.
> 	(vec_extract<mode><Vecelem>): New instruction.

Added test-case, fixed some nits, pushed (not reposting).

Thanks,
- Tom
diff mbox series

Patch

diff --git a/gcc/config/nvptx/nvptx.md b/gcc/config/nvptx/nvptx.md
index 6545b81..b363277 100644
--- a/gcc/config/nvptx/nvptx.md
+++ b/gcc/config/nvptx/nvptx.md
@@ -118,6 +118,10 @@ 
 (define_predicate "nvptx_float_comparison_operator"
   (match_code "eq,ne,le,ge,lt,gt,uneq,unle,unge,unlt,ungt,unordered,ordered"))
 
+(define_predicate "nvptx_vector_index_operand"
+  (and (match_code "const_int")
+       (match_test "UINTVAL (op) < 4")))
+
 ;; Test for a valid operand for a call instruction.
 (define_predicate "call_insn_operand"
   (match_code "symbol_ref,reg")
@@ -194,6 +198,10 @@ 
 ;; pointer-sized quantities.  Exactly one of the two alternatives will match.
 (define_mode_iterator P [(SI "Pmode == SImode") (DI "Pmode == DImode")])
 
+;; Define element mode for each vector mode.
+(define_mode_attr VECELEM [(V2SI "SI") (V2DI "DI")])
+(define_mode_attr Vecelem [(V2SI "si") (V2DI "di")])
+
 ;; We should get away with not defining memory alternatives, since we don't
 ;; get variables in this mode and pseudos are never spilled.
 (define_insn "movbi"
@@ -1051,6 +1059,79 @@ 
   ""
   "%.\\tcvt<FPINT2:fpint2_roundingmode>.s%T0%t1\\t%0, %1;")
 
+;; Vector operations
+
+(define_insn "*vec_set<mode>_0"
+  [(set (match_operand:VECIM 0 "nvptx_register_operand" "=R")
+        (vec_merge:VECIM
+	  (vec_duplicate:VECIM
+	    (match_operand:<VECELEM> 1 "nvptx_register_operand" "R"))
+	  (match_dup 0)
+	  (const_int 1)))]
+  ""
+  "%.\\tmov%t1\\t%0.x, %1;")
+
+(define_insn "*vec_set<mode>_1"
+  [(set (match_operand:VECIM 0 "nvptx_register_operand" "=R")
+        (vec_merge:VECIM
+	  (vec_duplicate:VECIM
+	    (match_operand:<VECELEM> 1 "nvptx_register_operand" "R"))
+	  (match_dup 0)
+	  (const_int 2)))]
+  ""
+  "%.\\tmov%t1\\t%0.y, %1;")
+
+(define_insn "*vec_set<mode>_2"
+  [(set (match_operand:VECIM 0 "nvptx_register_operand" "=R")
+        (vec_merge:VECIM
+	  (vec_duplicate:VECIM
+	    (match_operand:<VECELEM> 1 "nvptx_register_operand" "R"))
+	  (match_dup 0)
+	  (const_int 4)))]
+  ""
+  "%.\\tmov%t1\\t%0.z, %1;")
+
+(define_insn "*vec_set<mode>_3"
+  [(set (match_operand:VECIM 0 "nvptx_register_operand" "=R")
+        (vec_merge:VECIM
+	  (vec_duplicate:VECIM
+	    (match_operand:<VECELEM> 1 "nvptx_register_operand" "R"))
+	  (match_dup 0)
+	  (const_int 8)))]
+  ""
+  "%.\\tmov%t1\\t%0.w, %1;")
+
+(define_expand "vec_set<mode>"
+  [(match_operand:VECIM 0 "nvptx_register_operand")
+   (match_operand:<VECELEM> 1 "nvptx_register_operand")
+   (match_operand:SI 2 "nvptx_vector_index_operand")]
+  ""
+{
+  enum machine_mode mode = GET_MODE (operands[0]);
+  int mask = 1 << INTVAL (operands[2]);
+  rtx tmp = gen_rtx_VEC_DUPLICATE (mode, operands[1]);
+  tmp = gen_rtx_VEC_MERGE (mode, tmp, operands[0], GEN_INT (mask));
+  emit_insn (gen_rtx_SET (operands[0], tmp));
+  DONE;
+})
+
+(define_insn "vec_extract<mode><Vecelem>"
+  [(set (match_operand:<VECELEM> 0 "nvptx_register_operand" "=R")
+	(vec_select:<VECELEM>
+	  (match_operand:VECIM 1 "nvptx_register_operand" "R")
+	  (parallel [(match_operand:SI 2 "nvptx_vector_index_operand" "")])))]
+  ""
+{
+  static const char *const asms[4] =
+    { 
+      "%.\\tmov%t0\\t%0, %1.x;",
+      "%.\\tmov%t0\\t%0, %1.y;",
+      "%.\\tmov%t0\\t%0, %1.z;",
+      "%.\\tmov%t0\\t%0, %1.w;"
+    };
+  return asms[INTVAL (operands[2])];
+})
+
 ;; Miscellaneous
 
 (define_insn "nop"