@@ -166,7 +166,6 @@ (define_c_enum "unspec" [
UNSPEC_VEC_PACK_UNSIGNED_SATURATE_CC
UNSPEC_VEC_PACK_UNSIGNED_SATURATE_GENCC
UNSPEC_VEC_PERM
- UNSPEC_VEC_PERMI
UNSPEC_VEC_EXTEND
UNSPEC_VEC_STORE_LEN
UNSPEC_VEC_STORE_LEN_R
@@ -767,6 +767,32 @@ (define_insn "*vec_perm<mode>"
"vperm\t%v0,%v1,%v2,%v3"
[(set_attr "op_type" "VRR")])
+
+; First DW of op1 and second DW of op2
+(define_insn "*vpdi1<mode>"
+ [(set (match_operand:V_HW_2 0 "register_operand" "=v")
+ (vec_select:V_HW_2
+ (vec_concat:<vec_2x_nelts>
+ (match_operand:V_HW_2 1 "register_operand" "v")
+ (match_operand:V_HW_2 2 "register_operand" "v"))
+ (parallel [(const_int 0) (const_int 3)])))]
+ "TARGET_VX"
+ "vpdi\t%v0,%v1,%v2,1"
+ [(set_attr "op_type" "VRR")])
+
+; Second DW of op1 and first of op2
+(define_insn "*vpdi4<mode>"
+ [(set (match_operand:V_HW_2 0 "register_operand" "=v")
+ (vec_select:V_HW_2
+ (vec_concat:<vec_2x_nelts>
+ (match_operand:V_HW_2 1 "register_operand" "v")
+ (match_operand:V_HW_2 2 "register_operand" "v"))
+ (parallel [(const_int 1) (const_int 2)])))]
+ "TARGET_VX"
+ "vpdi\t%v0,%v1,%v2,4"
+ [(set_attr "op_type" "VRR")])
+
+
(define_insn "*vmrhb"
[(set (match_operand:V16QI 0 "register_operand" "=v")
(vec_select:V16QI
@@ -403,28 +403,22 @@ (define_insn "vec_zperm<mode>"
"vperm\t%v0,%v1,%v2,%v3"
[(set_attr "op_type" "VRR")])
+; Incoming op3 is in vec_permi format and will we turned into a
+; permute vector consisting of op3 and op4.
(define_expand "vec_permi<mode>"
- [(set (match_operand:V_HW_64 0 "register_operand" "")
- (unspec:V_HW_64 [(match_operand:V_HW_64 1 "register_operand" "")
- (match_operand:V_HW_64 2 "register_operand" "")
- (match_operand:QI 3 "const_mask_operand" "")]
- UNSPEC_VEC_PERMI))]
+ [(set (match_operand:V_HW_2 0 "register_operand" "")
+ (vec_select:V_HW_2
+ (vec_concat:<vec_2x_nelts>
+ (match_operand:V_HW_2 1 "register_operand" "")
+ (match_operand:V_HW_2 2 "register_operand" ""))
+ (parallel [(match_operand:QI 3 "const_mask_operand" "") (match_dup 4)])))]
"TARGET_VX"
{
HOST_WIDE_INT val = INTVAL (operands[3]);
- operands[3] = GEN_INT ((val & 1) | (val & 2) << 1);
+ operands[3] = GEN_INT ((val & 2) >> 1);
+ operands[4] = GEN_INT ((val & 1) + 2);
})
-(define_insn "*vec_permi<mode>"
- [(set (match_operand:V_HW_64 0 "register_operand" "=v")
- (unspec:V_HW_64 [(match_operand:V_HW_64 1 "register_operand" "v")
- (match_operand:V_HW_64 2 "register_operand" "v")
- (match_operand:QI 3 "const_mask_operand" "C")]
- UNSPEC_VEC_PERMI))]
- "TARGET_VX && (UINTVAL (operands[3]) & 10) == 0"
- "vpdi\t%v0,%v1,%v2,%b3"
- [(set_attr "op_type" "VRR")])
-
; Vector replicate
deleted file mode 100644
@@ -1,54 +0,0 @@
-/* { dg-do compile } */
-/* { dg-options "-O3 -march=z13 -mzarch --save-temps" } */
-/* { dg-do run { target { s390_z13_hw } } } */
-
-/*
- * The vector intrinsic vec_permi(a, b, c) chooses one of the two eight-byte
- * vector elements in each of a and b, depending on the value of c. The valid
- * values for c differ from the encoding for the M4 field in assembly and in the
- * binary instruction.
- *
- * selection | c | encoding in assembly
- * a[0] b[0] | 0 | 0
- * a[0] b[1] | 1 | 1
- * a[1] b[0] | 2 | 4
- * a[1] b[1] | 3 | 5
- *
- * (i.e., indices a[i] b[j] are encoded for c as (i<<1) | j, yet for the
- * M4 field as (i<<2) | j.
- */
-#include <assert.h>
-#include <vecintrin.h>
-
-typedef unsigned long long uv2di __attribute__((vector_size(16)));
-
-__attribute__ ((noipa)) static uv2di
-do_vec_permi(uv2di a, uv2di b, int c)
-{
- switch(c) {
- case 0: return vec_permi(a, b, 0);
- case 1: return vec_permi(a, b, 1);
- case 2: return vec_permi(a, b, 2);
- case 3: return vec_permi(a, b, 3);
- default: assert(0);
- }
-}
-
-/* { dg-final { scan-assembler-times {\n\tvpdi\t%v\d+,%v\d+,%v\d+,0\n} 1 } } */
-/* { dg-final { scan-assembler-times {\n\tvpdi\t%v\d+,%v\d+,%v\d+,1\n} 1 } } */
-/* { dg-final { scan-assembler-times {\n\tvpdi\t%v\d+,%v\d+,%v\d+,4\n} 1 } } */
-/* { dg-final { scan-assembler-times {\n\tvpdi\t%v\d+,%v\d+,%v\d+,5\n} 1 } } */
-
-int
-main (void)
-{
- uv2di a = { 0xa0, 0xa1 };
- uv2di b = { 0xb0, 0xb1 };
-
- for (int i = 0; i < 2; i++)
- for (int j = 0; j < 2; j++) {
- uv2di res = do_vec_permi(a, b, (i<<1)|j);
- assert(res[0] == a[i]);
- assert(res[1] == b[j]);
- }
-}
new file mode 100644
@@ -0,0 +1,66 @@
+/* { dg-do compile } */
+/* { dg-options "-O3 -mzarch -march=z13 -mzvector --save-temps" } */
+/* { dg-do run { target { s390_z13_hw } } } */
+
+/*
+ * The vector intrinsic vec_permi(a, b, c) chooses one of the two eight-byte
+ * vector elements in each of a and b, depending on the value of c. The valid
+ * values for c differ from the encoding for the M4 field in assembly and in the
+ * binary instruction.
+ *
+ * selection | c | encoding in assembly
+ * a[0] b[0] | 0 | 0 -> vmrhg
+ * a[0] b[1] | 1 | 1
+ * a[1] b[0] | 2 | 4
+ * a[1] b[1] | 3 | 5 -> vmrlg
+ *
+ * (i.e., indices a[i] b[j] are encoded for c as (i<<1) | j, yet for the
+ * M4 field as (i<<2) | j.
+ */
+
+/* { dg-final { scan-assembler-times "\tvmrhg\t" 3 } } */
+/* { dg-final { scan-assembler-times "\tvmrlg\t" 3 } } */
+/* { dg-final { scan-assembler-times "\tvpdi\t" 6 } } */
+
+#include "vec-types.h"
+#include <vecintrin.h>
+
+#define GEN_PERMI_BITS(VEC_TYPE, BITS) \
+ VEC_TYPE __attribute__((noinline)) \
+ permi_##BITS##_##VEC_TYPE(VEC_TYPE a, VEC_TYPE b) { \
+ return vec_permi (a, b, (BITS)); }
+
+#define GEN_PERMI(VEC_TYPE) \
+ GEN_PERMI_BITS(VEC_TYPE, 0); \
+ GEN_PERMI_BITS(VEC_TYPE, 1); \
+ GEN_PERMI_BITS(VEC_TYPE, 2); \
+ GEN_PERMI_BITS(VEC_TYPE, 3);
+
+GEN_PERMI(v2di)
+GEN_PERMI(uv2di)
+GEN_PERMI(v2df)
+
+
+#define CHECK_PERMI_BITS(VEC_TYPE, BITS) \
+ VEC_TYPE r##BITS = permi_##BITS##_##VEC_TYPE (a, b); \
+ if (r##BITS[0] != ((BITS) & 2) >> 1 \
+ || r##BITS[1] != ((BITS) & 1) + 2) \
+ __builtin_abort();
+
+#define CHECK_PERMI(VEC_TYPE) \
+ { \
+ VEC_TYPE a = GEN_SEQ_VEC (VEC_TYPE, 0); \
+ VEC_TYPE b = GEN_SEQ_VEC (VEC_TYPE, 2); \
+ CHECK_PERMI_BITS (VEC_TYPE, 0); \
+ CHECK_PERMI_BITS (VEC_TYPE, 1); \
+ CHECK_PERMI_BITS (VEC_TYPE, 2); \
+ CHECK_PERMI_BITS (VEC_TYPE, 3); \
+ }
+
+int
+main ()
+{
+ CHECK_PERMI (v2di);
+ CHECK_PERMI (uv2di);
+ CHECK_PERMI (v2df);
+}