@@ -142,6 +142,7 @@
#define vec_madd __builtin_vec_madd
#define vec_madds __builtin_vec_madds
#define vec_mtvscr __builtin_vec_mtvscr
+#define vec_reve __builtin_vec_vreve
#define vec_vmaxfp __builtin_vec_vmaxfp
#define vec_vmaxsw __builtin_vec_vmaxsw
#define vec_vmaxsh __builtin_vec_vmaxsh
@@ -46,6 +46,7 @@
UNSPEC_VPACK_UNS_UNS_SAT
UNSPEC_VPACK_UNS_UNS_MOD
UNSPEC_VPACK_UNS_UNS_MOD_DIRECT
+ UNSPEC_VREVEV
UNSPEC_VSLV4SI
UNSPEC_VSLO
UNSPEC_VSR
@@ -231,6 +232,11 @@
;; Vector negate
(define_mode_iterator VNEG [V4SI V2DI])
+;; Vector reverse elements, uses define_mode_iterator VEC_A
+;; size in bytes of the vector element
+(define_mode_attr VEC_A_size [(V2DI "8") (V4SI "4") (V8HI "2")
+ (V16QI "1") (V2DF "8") (V4SF "4")])
+
;; Vector move instructions.
(define_insn "*altivec_mov<mode>"
[(set (match_operand:VM2 0 "nonimmediate_operand" "=Z,v,v,?Y,?*r,?*r,v,v,?*r")
@@ -3727,6 +3733,31 @@
DONE;
}")
+;; Vector reverse elements
+(define_expand "altivec_vreve<mode>2"
+ [(set (match_operand:VEC_A 0 "register_operand" "=v")
+ (unspec:VEC_A [(match_operand:VEC_A 1 "register_operand" "v")]
+ UNSPEC_VREVEV))]
+ "TARGET_ALTIVEC"
+{
+ int i, j, k, size, num_elements;
+ rtvec v = rtvec_alloc (16);
+ rtx mask = gen_reg_rtx (V16QImode);
+
+ size = <VEC_A_size>;
+ num_elements = 16 / size;
+ k = 0;
+
+ for (j = num_elements-1; j >= 0; j--)
+ for (i = 0; i < size; i++)
+ RTVEC_ELT (v, i + j*size) = gen_rtx_CONST_INT (QImode, k++);
+
+ emit_insn (gen_vec_initv16qi (mask, gen_rtx_PARALLEL (V16QImode, v)));
+ emit_insn (gen_altivec_vperm_<mode> (operands[0], operands[1],
+ operands[1], mask));
+ DONE;
+})
+
;; Vector SIMD PEM v2.06c defines LVLX, LVLXL, LVRX, LVRXL,
;; STVLX, STVLXL, STVVRX, STVRXL are available only on Cell.
(define_insn "altivec_lvlx"
@@ -1130,6 +1130,13 @@ BU_ALTIVEC_1 (VUPKLSB, "vupklsb", CONST, altivec_vupklsb)
BU_ALTIVEC_1 (VUPKLPX, "vupklpx", CONST, altivec_vupklpx)
BU_ALTIVEC_1 (VUPKLSH, "vupklsh", CONST, altivec_vupklsh)
+BU_ALTIVEC_1 (VREVE_V2DI, "vreve_v2di", CONST, altivec_vrevev2di2)
+BU_ALTIVEC_1 (VREVE_V4SI, "vreve_v4si", CONST, altivec_vrevev4si2)
+BU_ALTIVEC_1 (VREVE_V8HI, "vreve_v8hi", CONST, altivec_vrevev8hi2)
+BU_ALTIVEC_1 (VREVE_V16QI, "vreve_v16qi", CONST, altivec_vrevev16qi2)
+BU_ALTIVEC_1 (VREVE_V2DF, "vreve_v2df", CONST, altivec_vrevev2df2)
+BU_ALTIVEC_1 (VREVE_V4SF, "vreve_v4sf", CONST, altivec_vrevev4sf2)
+
BU_ALTIVEC_1 (FLOAT_V4SI_V4SF, "float_sisf", FP, floatv4siv4sf2)
BU_ALTIVEC_1 (UNSFLOAT_V4SI_V4SF, "uns_float_sisf", FP, floatunsv4siv4sf2)
BU_ALTIVEC_1 (FIX_V4SF_V4SI, "fix_sfsi", FP, fix_truncv4sfv4si2)
@@ -1414,6 +1421,8 @@ BU_ALTIVEC_OVERLOAD_1 (VUPKLPX, "vupklpx")
BU_ALTIVEC_OVERLOAD_1 (VUPKLSB, "vupklsb")
BU_ALTIVEC_OVERLOAD_1 (VUPKLSH, "vupklsh")
+BU_ALTIVEC_OVERLOAD_1 (VREVE, "vreve")
+
/* Overloaded altivec predicates. */
BU_ALTIVEC_OVERLOAD_P (VCMPEQ_P, "vcmpeq_p")
BU_ALTIVEC_OVERLOAD_P (VCMPGT_P, "vcmpgt_p")
@@ -5521,6 +5521,35 @@ const struct altivec_builtin_types altivec_overloaded_builtins[] = {
{ P9V_BUILTIN_VEC_REVB, P9V_BUILTIN_XXBRH_V8HI,
RS6000_BTI_V8HI, RS6000_BTI_V8HI, 0, 0 },
+ { ALTIVEC_BUILTIN_VEC_VREVE, ALTIVEC_BUILTIN_VREVE_V2DI,
+ RS6000_BTI_V2DI, RS6000_BTI_V2DI, 0, 0 },
+ { ALTIVEC_BUILTIN_VEC_VREVE, ALTIVEC_BUILTIN_VREVE_V4SI,
+ RS6000_BTI_V4SI, RS6000_BTI_V4SI, 0, 0 },
+ { ALTIVEC_BUILTIN_VEC_VREVE, ALTIVEC_BUILTIN_VREVE_V8HI,
+ RS6000_BTI_V8HI, RS6000_BTI_V8HI, 0, 0 },
+ { ALTIVEC_BUILTIN_VEC_VREVE, ALTIVEC_BUILTIN_VREVE_V16QI,
+ RS6000_BTI_V16QI, RS6000_BTI_V16QI, 0, 0 },
+ { ALTIVEC_BUILTIN_VEC_VREVE, ALTIVEC_BUILTIN_VREVE_V2DI,
+ RS6000_BTI_bool_V2DI, RS6000_BTI_bool_V2DI, 0, 0 },
+ { ALTIVEC_BUILTIN_VEC_VREVE, ALTIVEC_BUILTIN_VREVE_V4SI,
+ RS6000_BTI_bool_V4SI, RS6000_BTI_bool_V4SI, 0, 0 },
+ { ALTIVEC_BUILTIN_VEC_VREVE, ALTIVEC_BUILTIN_VREVE_V8HI,
+ RS6000_BTI_bool_V8HI, RS6000_BTI_bool_V8HI, 0, 0 },
+ { ALTIVEC_BUILTIN_VEC_VREVE, ALTIVEC_BUILTIN_VREVE_V16QI,
+ RS6000_BTI_bool_V16QI, RS6000_BTI_bool_V16QI, 0, 0 },
+ { ALTIVEC_BUILTIN_VEC_VREVE, ALTIVEC_BUILTIN_VREVE_V2DF,
+ RS6000_BTI_V2DF, RS6000_BTI_V2DF, 0, 0 },
+ { ALTIVEC_BUILTIN_VEC_VREVE, ALTIVEC_BUILTIN_VREVE_V4SF,
+ RS6000_BTI_V4SF, RS6000_BTI_V4SF, 0, 0 },
+ { ALTIVEC_BUILTIN_VEC_VREVE, ALTIVEC_BUILTIN_VREVE_V2DI,
+ RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, 0, 0 },
+ { ALTIVEC_BUILTIN_VEC_VREVE, ALTIVEC_BUILTIN_VREVE_V4SI,
+ RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, 0, 0 },
+ { ALTIVEC_BUILTIN_VEC_VREVE, ALTIVEC_BUILTIN_VREVE_V8HI,
+ RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI, 0, 0 },
+ { ALTIVEC_BUILTIN_VEC_VREVE, ALTIVEC_BUILTIN_VREVE_V16QI,
+ RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI, 0, 0 },
+
/* Crypto builtins. */
{ CRYPTO_BUILTIN_VPERMXOR, CRYPTO_BUILTIN_VPERMXOR_V16QI,
RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI,
@@ -16558,6 +16558,19 @@ vector bool char vec_perm (vector bool char,
vector float vec_re (vector float);
+vector bool char vec_reve (vector bool char);
+vector signed char vec_reve (vector signed char);
+vector unsigned char vec_reve (vector unsigned char);
+vector bool int vec_reve (vector bool int);
+vector signed int vec_reve (vector signed int);
+vector unsigned int vec_reve (vector unsigned int);
+vector bool long long vec_reve (vector bool long long);
+vector signed long long vec_reve (vector signed long long);
+vector unsigned long long vec_reve (vector unsigned long long);
+vector bool short vec_reve (vector bool short);
+vector signed short vec_reve (vector signed short);
+vector unsigned short vec_reve (vector unsigned short);
+
vector signed char vec_rl (vector signed char,
vector unsigned char);
vector unsigned char vec_rl (vector unsigned char,
new file mode 100644
@@ -0,0 +1,251 @@
+/* { dg-do run { target { powerpc*-*-linux* } } } */
+/* { dg-require-effective-target vsx_hw } */
+/* { dg-options "-O2 -mvsx -mcpu=power8" } */
+/* { dg-skip-if "do not override -mcpu" { powerpc*-*-* } { "-mcpu=*" } { "-mcpu=power8" } } */
+
+#include <altivec.h> // vector
+
+#define VBC 0
+#define VSC 1
+#define VUC 2
+#define VBS 3
+#define VSS 4
+#define VUS 5
+#define VBI 6
+#define VI 7
+#define VUI 8
+#define VLLB 9
+#define VLLI 10
+#define VLLUI 11
+#define VF 12
+#define VD 13
+
+union vector_value
+{
+ vector bool char vbc;
+ vector signed char vsc;
+ vector unsigned char vuc;
+ vector bool short vbs;
+ vector signed short vss;
+ vector unsigned short vus;
+ vector bool int vbi;
+ vector signed int vi;
+ vector unsigned int vui;
+ vector bool long long vllb;
+ vector long long signed int vlli;
+ vector long long unsigned int vllui;
+ vector float vf;
+ vector double vd;
+} vec_element;
+
+struct vector_struct
+{
+ int vector_id;
+ int element_size; // element size in bytes
+ union vector_value vec;
+} vec;
+
+void abort (void);
+
+void test_results(struct vector_struct *vec_result,
+ struct vector_struct *vec_expected)
+{
+ int i;
+ int num_elements;
+ if (vec_result->element_size != vec_expected->element_size)
+ abort();
+
+ if (vec_result->vector_id != vec_expected->vector_id)
+ abort();
+
+ num_elements = 16 / vec_result->element_size;
+
+ for (i = 0; i<num_elements; i++) {
+ switch (vec_result->vector_id) {
+ case VBC:
+ if (vec_result->vec.vbc[i] != vec_expected->vec.vbc[i])
+ abort();
+ break;
+
+ case VSC:
+ if (vec_result->vec.vsc[i] != vec_expected->vec.vsc[i])
+ abort();
+ break;
+
+ case VUC:
+ if (vec_result->vec.vuc[i] != vec_expected->vec.vuc[i])
+ abort();
+ break;
+
+ case VBS:
+ if (vec_result->vec.vbs[i] != vec_expected->vec.vbs[i])
+ abort();
+ break;
+
+ case VSS:
+ if (vec_result->vec.vss[i] != vec_expected->vec.vss[i])
+ abort();
+ break;
+
+ case VUS:
+ if (vec_result->vec.vus[i] != vec_expected->vec.vus[i])
+ abort();
+ break;
+
+ case VBI:
+ if (vec_result->vec.vbi[i] != vec_expected->vec.vbi[i])
+ abort();
+ break;
+
+ case VI:
+ if (vec_result->vec.vi[i] != vec_expected->vec.vi[i])
+ abort();
+ break;
+
+ case VUI:
+ if (vec_result->vec.vui[i] != vec_expected->vec.vui[i])
+ abort();
+ break;
+
+ case VLLB:
+ if (vec_result->vec.vllb[i] != vec_expected->vec.vllb[i])
+ abort();
+ break;
+
+ case VLLI:
+ if (vec_result->vec.vlli[i] != vec_expected->vec.vlli[i])
+ abort();
+ break;
+
+ case VLLUI:
+ if (vec_result->vec.vllui[i] != vec_expected->vec.vllui[i])
+ abort();
+ break;
+
+ case VF:
+ if (vec_result->vec.vf[i] != vec_expected->vec.vf[i])
+ abort();
+ break;
+
+ case VD:
+ if (vec_result->vec.vd[i] != vec_expected->vec.vd[i])
+ abort();
+ break;
+
+ default: abort();
+ }
+ }
+}
+
+int main()
+{
+ int i;
+ struct vector_struct vec_src, vec_expected, vec_result;
+
+ vec_src.vec.vbc = (vector bool char){ 0, 1, 0, 0, 1, 1, 0, 0,
+ 0, 1, 1, 1, 0, 0, 0, 0 };
+ vec_expected.vec.vbc = (vector bool char){ 0, 0, 0, 0, 1, 1, 1, 0,
+ 0, 0, 1, 1, 0, 0, 1, 0 };
+ vec_result.element_size = vec_expected.element_size = 1;
+ vec_result.vector_id = vec_expected.vector_id = VBC;
+ vec_result.vec.vbc = vec_reve (vec_src.vec.vbc);
+ test_results(&vec_result, &vec_expected);
+
+ vec_src.vec.vsc = (vector signed char){ 0, 1, -2, -3, 4, 5, -6, -7, 8,
+ 9, -10, -11, 12, 13, -14, -15 };
+ vec_expected.vec.vsc = (vector signed char){ -15, -14, 13, 12, -11, -10,
+ 9, 8, -7, -6, 5, 4, -3, -2,
+ 1, 0 };
+ vec_result.element_size = vec_expected.element_size = 1;
+ vec_result.vector_id = vec_expected.vector_id = VSC;
+ vec_result.vec.vsc = vec_reve (vec_src.vec.vsc);
+ test_results (&vec_result, &vec_expected);
+
+ vec_src.vec.vuc = (vector unsigned char){ 10, 11, 12, 13, 14, 15, 16, 17,
+ 18, 19, 20, 21, 22, 23, 24, 25 };
+ vec_expected.vec.vuc = (vector unsigned char){ 25, 24, 23, 22, 21, 20,
+ 19, 18, 17, 16, 15, 14, 13,
+ 12, 11, 10 };
+ vec_result.element_size = vec_expected.element_size = 1;
+ vec_result.vector_id = vec_expected.vector_id = VUC;
+ vec_result.vec.vuc = vec_reve (vec_src.vec.vuc);
+ test_results (&vec_result, &vec_expected);
+
+ vec_src.vec.vbs = (vector bool short){ 0, 0, 1, 1, 0, 1, 0, 1 };
+ vec_expected.vec.vbs = (vector bool short){ 1, 0, 1, 0, 1, 1, 0, 0 };
+ vec_result.element_size = vec_expected.element_size = 2;
+ vec_result.vector_id = vec_expected.vector_id = VBS;
+ vec_result.vec.vbs = vec_reve (vec_src.vec.vbs);
+ test_results (&vec_result, &vec_expected);
+
+ vec_src.vec.vss = (vector signed short){ -1, -2, 3, 4, -5, -6, 7, 8 };
+ vec_expected.vec.vss = (vector signed short){ 8, 7, -6, -5, 4, 3, -2, -1 };
+ vec_result.element_size = vec_expected.element_size = 2;
+ vec_result.vector_id = vec_expected.vector_id = VSS;
+ vec_result.vec.vss = vec_reve (vec_src.vec.vss);
+ test_results (&vec_result, &vec_expected);
+
+ vec_src.vec.vus = (vector unsigned short){ 11, 22, 33, 44, 55, 66, 77, 88 };
+ vec_expected.vec.vus = (vector unsigned short){ 88, 77, 66, 55,
+ 44, 33, 22, 11 };
+ vec_result.element_size = vec_expected.element_size = 2;
+ vec_result.vector_id = vec_expected.vector_id = VUS;
+ vec_result.vec.vus = vec_reve (vec_src.vec.vus);
+ test_results (&vec_result, &vec_expected);
+
+ vec_src.vec.vbi = (vector bool int){ 0, 1, 1, 1 };
+ vec_expected.vec.vbi = (vector bool int){ 1, 1, 1, 0 };
+ vec_result.element_size = vec_expected.element_size = 4;
+ vec_result.vector_id = vec_expected.vector_id = VBI;
+ vec_result.vec.vbi = vec_reve (vec_src.vec.vbi);
+ test_results (&vec_result, &vec_expected);
+
+ vec_src.vec.vi = (vector signed int){ -1, 3, -5, 1234567 };
+ vec_expected.vec.vi = (vector signed int){1234567, -5, 3, -1};
+ vec_result.element_size = vec_expected.element_size = 4;
+ vec_result.vector_id = vec_expected.vector_id = VI;
+ vec_result.vec.vi = vec_reve (vec_src.vec.vi);
+ test_results (&vec_result, &vec_expected);
+
+ vec_src.vec.vui = (vector unsigned int){ 9, 11, 15, 2468013579 };
+ vec_expected.vec.vui = (vector unsigned int){2468013579, 15, 11, 9};
+ vec_result.element_size = vec_expected.element_size = 4;
+ vec_result.vector_id = vec_expected.vector_id = VUI;
+ vec_result.vec.vui = vec_reve (vec_src.vec.vui);
+ test_results (&vec_result, &vec_expected);
+
+ vec_src.vec.vllb = (vector bool long long ){ 0, 1 };
+ vec_expected.vec.vllb = (vector bool long long){1, 0};
+ vec_result.element_size = vec_expected.element_size = 8;
+ vec_result.vector_id = vec_expected.vector_id = VLLB;
+ vec_result.vec.vllb = vec_reve (vec_src.vec.vllb);
+ test_results (&vec_result, &vec_expected);
+
+ vec_src.vec.vlli = (vector long long int){ -12, -12345678901234 };
+ vec_expected.vec.vlli = (vector long long int){-12345678901234, -12};
+ vec_result.element_size = vec_expected.element_size = 8;
+ vec_result.vector_id = vec_expected.vector_id = VLLI;
+ vec_result.vec.vlli = vec_reve (vec_src.vec.vlli);
+ test_results (&vec_result, &vec_expected);
+
+ vec_src.vec.vllui = (vector unsigned long long int){ 102, 9753108642 };
+ vec_expected.vec.vllui = (vector unsigned long long int){9753108642, 102};
+ vec_result.element_size = vec_expected.element_size = 8;
+ vec_result.vector_id = vec_expected.vector_id = VLLUI;
+ vec_result.vec.vllui = vec_reve (vec_src.vec.vllui);
+ test_results (&vec_result, &vec_expected);
+
+ vec_src.vec.vf = (vector float){ -21., 3.5, -53., 78. };
+ vec_expected.vec.vf = (vector float){78., -53, 3.5, -21};
+ vec_result.element_size = vec_expected.element_size = 4;
+ vec_result.vector_id = vec_expected.vector_id = VF;
+ vec_result.vec.vf = vec_reve (vec_src.vec.vf);
+ test_results (&vec_result, &vec_expected);
+
+ vec_src.vec.vd = (vector double){ 34.0, 97.0 };
+ vec_expected.vec.vd = (vector double){97.0, 34.0};
+ vec_result.element_size = vec_expected.element_size = 8;
+ vec_result.vector_id = vec_expected.vector_id = VD;
+ vec_result.vec.vd = vec_reve (vec_src.vec.vd);
+ test_results (&vec_result, &vec_expected);
+}