diff mbox series

[2/5] IBM Z: Get rid of vpdi unspec

Message ID 20210729073730.23208-3-krebbel@linux.ibm.com
State New
Headers show
Series IBM Z: Implement TARGET_VECTORIZE_VEC_PERM_CONST | expand

Commit Message

Andreas Krebbel July 29, 2021, 7:37 a.m. UTC
The patch gets rid of the unspec used for the vector permute double
immediate instruction and replaces it with generic rtx.

gcc/ChangeLog:

	* config/s390/s390.md (UNSPEC_VEC_PERMI): Remove constant
	definition.
	* config/s390/vector.md (*vpdi1<mode>, *vpdi4<mode>): New pattern
	definitions.
	* config/s390/vx-builtins.md (*vec_permi<mode>): Emit generic rtx
	instead of an unspec.

gcc/testsuite/ChangeLog:

	* gcc.target/s390/zvector/vec-permi.c: Removed.
	* gcc.target/s390/zvector/vec_permi.c: New test.
---
 gcc/config/s390/s390.md                       |  1 -
 gcc/config/s390/vector.md                     | 26 ++++++++
 gcc/config/s390/vx-builtins.md                | 26 +++-----
 .../gcc.target/s390/zvector/vec-permi.c       | 54 ---------------
 .../gcc.target/s390/zvector/vec_permi.c       | 66 +++++++++++++++++++
 5 files changed, 102 insertions(+), 71 deletions(-)
 delete mode 100644 gcc/testsuite/gcc.target/s390/zvector/vec-permi.c
 create mode 100644 gcc/testsuite/gcc.target/s390/zvector/vec_permi.c
diff mbox series

Patch

diff --git a/gcc/config/s390/s390.md b/gcc/config/s390/s390.md
index d896faee0fb..1b894a926ce 100644
--- a/gcc/config/s390/s390.md
+++ b/gcc/config/s390/s390.md
@@ -166,7 +166,6 @@  (define_c_enum "unspec" [
    UNSPEC_VEC_PACK_UNSIGNED_SATURATE_CC
    UNSPEC_VEC_PACK_UNSIGNED_SATURATE_GENCC
    UNSPEC_VEC_PERM
-   UNSPEC_VEC_PERMI
    UNSPEC_VEC_EXTEND
    UNSPEC_VEC_STORE_LEN
    UNSPEC_VEC_STORE_LEN_R
diff --git a/gcc/config/s390/vector.md b/gcc/config/s390/vector.md
index 7507aec1c8e..6a6370b5275 100644
--- a/gcc/config/s390/vector.md
+++ b/gcc/config/s390/vector.md
@@ -767,6 +767,32 @@  (define_insn "*vec_perm<mode>"
   "vperm\t%v0,%v1,%v2,%v3"
   [(set_attr "op_type" "VRR")])
 
+
+; First DW of op1 and second DW of op2
+(define_insn "*vpdi1<mode>"
+  [(set (match_operand:V_HW_2   0 "register_operand" "=v")
+	(vec_select:V_HW_2
+	 (vec_concat:<vec_2x_nelts>
+	  (match_operand:V_HW_2 1 "register_operand"  "v")
+	  (match_operand:V_HW_2 2 "register_operand"  "v"))
+	 (parallel [(const_int 0) (const_int 3)])))]
+  "TARGET_VX"
+  "vpdi\t%v0,%v1,%v2,1"
+  [(set_attr "op_type" "VRR")])
+
+; Second DW of op1 and first of op2
+(define_insn "*vpdi4<mode>"
+  [(set (match_operand:V_HW_2   0 "register_operand" "=v")
+	(vec_select:V_HW_2
+	 (vec_concat:<vec_2x_nelts>
+	  (match_operand:V_HW_2 1 "register_operand"  "v")
+	  (match_operand:V_HW_2 2 "register_operand"  "v"))
+	 (parallel [(const_int 1) (const_int 2)])))]
+  "TARGET_VX"
+  "vpdi\t%v0,%v1,%v2,4"
+  [(set_attr "op_type" "VRR")])
+
+
 (define_insn "*vmrhb"
   [(set (match_operand:V16QI                     0 "register_operand" "=v")
         (vec_select:V16QI
diff --git a/gcc/config/s390/vx-builtins.md b/gcc/config/s390/vx-builtins.md
index 5abe43b9e53..3799e833187 100644
--- a/gcc/config/s390/vx-builtins.md
+++ b/gcc/config/s390/vx-builtins.md
@@ -403,28 +403,22 @@  (define_insn "vec_zperm<mode>"
   "vperm\t%v0,%v1,%v2,%v3"
   [(set_attr "op_type" "VRR")])
 
+; Incoming op3 is in vec_permi format and will we turned into a
+; permute vector consisting of op3 and op4.
 (define_expand "vec_permi<mode>"
-  [(set (match_operand:V_HW_64                  0 "register_operand"   "")
-	(unspec:V_HW_64 [(match_operand:V_HW_64 1 "register_operand"   "")
-			 (match_operand:V_HW_64 2 "register_operand"   "")
-			 (match_operand:QI      3 "const_mask_operand" "")]
-			UNSPEC_VEC_PERMI))]
+  [(set (match_operand:V_HW_2   0 "register_operand" "")
+	(vec_select:V_HW_2
+	 (vec_concat:<vec_2x_nelts>
+	  (match_operand:V_HW_2 1 "register_operand" "")
+	  (match_operand:V_HW_2 2 "register_operand" ""))
+	 (parallel [(match_operand:QI 3 "const_mask_operand" "") (match_dup 4)])))]
   "TARGET_VX"
 {
   HOST_WIDE_INT val = INTVAL (operands[3]);
-  operands[3] = GEN_INT ((val & 1) | (val & 2) << 1);
+  operands[3] = GEN_INT ((val & 2) >> 1);
+  operands[4] = GEN_INT ((val & 1) + 2);
 })
 
-(define_insn "*vec_permi<mode>"
-  [(set (match_operand:V_HW_64                  0 "register_operand"  "=v")
-	(unspec:V_HW_64 [(match_operand:V_HW_64 1 "register_operand"   "v")
-			 (match_operand:V_HW_64 2 "register_operand"   "v")
-			 (match_operand:QI      3 "const_mask_operand" "C")]
-			UNSPEC_VEC_PERMI))]
-  "TARGET_VX && (UINTVAL (operands[3]) & 10) == 0"
-  "vpdi\t%v0,%v1,%v2,%b3"
-  [(set_attr "op_type" "VRR")])
-
 
 ; Vector replicate
 
diff --git a/gcc/testsuite/gcc.target/s390/zvector/vec-permi.c b/gcc/testsuite/gcc.target/s390/zvector/vec-permi.c
deleted file mode 100644
index c0a852b9703..00000000000
--- a/gcc/testsuite/gcc.target/s390/zvector/vec-permi.c
+++ /dev/null
@@ -1,54 +0,0 @@ 
-/* { dg-do compile } */
-/* { dg-options "-O3 -march=z13 -mzarch --save-temps" } */
-/* { dg-do run { target { s390_z13_hw } } } */
-
-/*
- * The vector intrinsic vec_permi(a, b, c) chooses one of the two eight-byte
- * vector elements in each of a and b, depending on the value of c. The valid
- * values for c differ from the encoding for the M4 field in assembly and in the
- * binary instruction.
- *
- * selection | c | encoding in assembly
- * a[0] b[0] | 0 | 0
- * a[0] b[1] | 1 | 1
- * a[1] b[0] | 2 | 4
- * a[1] b[1] | 3 | 5
- *
- * (i.e., indices a[i] b[j] are encoded for c as (i<<1) | j, yet for the
- * M4 field as (i<<2) | j.
- */
-#include <assert.h>
-#include <vecintrin.h>
-
-typedef unsigned long long uv2di __attribute__((vector_size(16)));
-
-__attribute__ ((noipa)) static uv2di
-do_vec_permi(uv2di a, uv2di b, int c)
-{
-    switch(c) {
-	case 0: return vec_permi(a, b, 0);
-	case 1: return vec_permi(a, b, 1);
-	case 2: return vec_permi(a, b, 2);
-	case 3: return vec_permi(a, b, 3);
-	default: assert(0);
-    }
-}
-
-/* { dg-final { scan-assembler-times {\n\tvpdi\t%v\d+,%v\d+,%v\d+,0\n} 1 } } */
-/* { dg-final { scan-assembler-times {\n\tvpdi\t%v\d+,%v\d+,%v\d+,1\n} 1 } } */
-/* { dg-final { scan-assembler-times {\n\tvpdi\t%v\d+,%v\d+,%v\d+,4\n} 1 } } */
-/* { dg-final { scan-assembler-times {\n\tvpdi\t%v\d+,%v\d+,%v\d+,5\n} 1 } } */
-
-int
-main (void)
-{
-    uv2di a = { 0xa0, 0xa1 };
-    uv2di b = { 0xb0, 0xb1 };
-
-    for (int i = 0; i < 2; i++)
-	for (int j = 0; j < 2; j++) {
-	    uv2di res = do_vec_permi(a, b, (i<<1)|j);
-	    assert(res[0] == a[i]);
-	    assert(res[1] == b[j]);
-	}
-}
diff --git a/gcc/testsuite/gcc.target/s390/zvector/vec_permi.c b/gcc/testsuite/gcc.target/s390/zvector/vec_permi.c
new file mode 100644
index 00000000000..b66fa905dd0
--- /dev/null
+++ b/gcc/testsuite/gcc.target/s390/zvector/vec_permi.c
@@ -0,0 +1,66 @@ 
+/* { dg-do compile } */
+/* { dg-options "-O3 -mzarch -march=z13 -mzvector --save-temps" } */
+/* { dg-do run { target { s390_z13_hw } } } */
+
+/*
+ * The vector intrinsic vec_permi(a, b, c) chooses one of the two eight-byte
+ * vector elements in each of a and b, depending on the value of c. The valid
+ * values for c differ from the encoding for the M4 field in assembly and in the
+ * binary instruction.
+ *
+ * selection | c | encoding in assembly
+ * a[0] b[0] | 0 | 0          -> vmrhg
+ * a[0] b[1] | 1 | 1
+ * a[1] b[0] | 2 | 4
+ * a[1] b[1] | 3 | 5          -> vmrlg
+ *
+ * (i.e., indices a[i] b[j] are encoded for c as (i<<1) | j, yet for the
+ * M4 field as (i<<2) | j.
+ */
+
+/* { dg-final { scan-assembler-times "\tvmrhg\t" 3 } } */
+/* { dg-final { scan-assembler-times "\tvmrlg\t" 3 } } */
+/* { dg-final { scan-assembler-times "\tvpdi\t" 6 } } */
+
+#include "vec-types.h"
+#include <vecintrin.h>
+
+#define GEN_PERMI_BITS(VEC_TYPE, BITS)			\
+  VEC_TYPE __attribute__((noinline))			\
+  permi_##BITS##_##VEC_TYPE(VEC_TYPE a, VEC_TYPE b) {	\
+    return vec_permi (a, b, (BITS)); }
+
+#define GEN_PERMI(VEC_TYPE)			\
+  GEN_PERMI_BITS(VEC_TYPE, 0);			\
+  GEN_PERMI_BITS(VEC_TYPE, 1);			\
+  GEN_PERMI_BITS(VEC_TYPE, 2);			\
+  GEN_PERMI_BITS(VEC_TYPE, 3);
+
+GEN_PERMI(v2di)
+GEN_PERMI(uv2di)
+GEN_PERMI(v2df)
+
+
+#define CHECK_PERMI_BITS(VEC_TYPE, BITS)		\
+  VEC_TYPE r##BITS = permi_##BITS##_##VEC_TYPE (a, b);	\
+  if (r##BITS[0] != ((BITS) & 2) >> 1			\
+      || r##BITS[1] != ((BITS) & 1) + 2)		\
+    __builtin_abort();
+
+#define CHECK_PERMI(VEC_TYPE)			\
+  {						\
+    VEC_TYPE a = GEN_SEQ_VEC (VEC_TYPE, 0);	\
+    VEC_TYPE b = GEN_SEQ_VEC (VEC_TYPE, 2);	\
+    CHECK_PERMI_BITS (VEC_TYPE, 0);		\
+    CHECK_PERMI_BITS (VEC_TYPE, 1);		\
+    CHECK_PERMI_BITS (VEC_TYPE, 2);		\
+    CHECK_PERMI_BITS (VEC_TYPE, 3);		\
+  }
+
+int
+main ()
+{
+  CHECK_PERMI (v2di);
+  CHECK_PERMI (uv2di);
+  CHECK_PERMI (v2df);
+}