===================================================================
@@ -6141,6 +6141,32 @@ minus or complement operators on a vecto
elements are the negative or complemented values of the corresponding
elements in the operand.
+Vector shuffling is available using functions
+@code{__builtin_shuffle (vec, mask)} and
+@code{__builtin_shuffle (vec0, vec1, mask)}. Both functions construct
+a permutation of elements from one or two vectors and return a vector
+of the same type as input vector(s). The mask is a vector of
+integer-typed elements. The size of each element of the mask must be
+the same as the size of each input vector element. The number of
+elements in input vector(s) and mask must be the same.
+
+The elements of the input vectors are numbered from left to right across
+one or both of the vectors. Each element in the mask specifies a number
+of element from the input vector(s). Consider the following example.
+
+@smallexample
+typedef int v4si __attribute__ ((vector_size (16)));
+
+v4si a = @{1,2,3,4@};
+v4si b = @{5,6,7,8@};
+v4si mask1 = @{0,1,1,3@};
+v4si mask2 = @{0,4,2,5@};
+v4si res;
+
+res = __builtin_shuffle (a, mask1); /* res is @{1,2,2,4@} */
+res = __builtin_shuffle2 (a, b, mask2); /* res is @{1,5,3,6@} */
+@end smallexample
+
You can declare variables and use them in function calls and returns, as
well as in assignments and some casts. You can specify a vector type as
a return type for a function. Vector types can also be used as function
===================================================================
@@ -5710,6 +5710,18 @@ misalignment value (@var{misalign}).
Return true if vector alignment is reachable (by peeling N iterations) for the given type.
@end deftypefn
+@deftypefn {Target Hook} bool TARGET_VECTORIZE_BUILTIN_SHUFFLE (gimple_stmt_iterator *@var{gsi}, tree @var{vec0}, tree @var{mask})
+This hook should find out if vector shuffling is possible within hardware instructions
+and replace the statement pointed by @var{gsi} with the appropriate hardware-specific
+functions. If the hardware expansion is unavailable, function returns false.
+@end deftypefn
+
+@deftypefn {Target Hook} bool TARGET_VECTORIZE_BUILTIN_SHUFFLE2 (gimple_stmt_iterator *@var{gsi}, tree @var{vec0}, tree @var{vec1}, tree @var{mask})
+This hook should find out if vector shuffling is possible within hardware instructions
+and replace the statement pointed by @var{gsi} with the appropriate hardware-specific
+functions. If the hardware expansion is unavailable, function returns false.
+@end deftypefn
+
@deftypefn {Target Hook} tree TARGET_VECTORIZE_BUILTIN_VEC_PERM (tree @var{type}, tree *@var{mask_element_type})
Target builtin that implements vector permute.
@end deftypefn
===================================================================
@@ -5710,6 +5710,18 @@ misalignment value (@var{misalign}).
Return true if vector alignment is reachable (by peeling N iterations) for the given type.
@end deftypefn
+@hook TARGET_VECTORIZE_BUILTIN_SHUFFLE
+This hook should find out if vector shuffling is possible within hardware instructions
+and replace the statement pointed by @var{gsi} with the appropriate hardware-specific
+functions. If the hardware expansion is unavailable, function returns false.
+@end deftypefn
+
+@hook TARGET_VECTORIZE_BUILTIN_SHUFFLE2
+This hook should find out if vector shuffling is possible within hardware instructions
+and replace the statement pointed by @var{gsi} with the appropriate hardware-specific
+functions. If the hardware expansion is unavailable, function returns false.
+@end deftypefn
+
@hook TARGET_VECTORIZE_BUILTIN_VEC_PERM
Target builtin that implements vector permute.
@end deftypefn
===================================================================
@@ -472,6 +472,26 @@ default_builtin_vectorized_function (tre
return NULL_TREE;
}
+
+/* Vector shuffling functions */
+
+bool
+default_builtin_shuffle (gimple_stmt_iterator *gsi ATTRIBUTE_UNUSED,
+ tree vec0 ATTRIBUTE_UNUSED,
+ tree mask ATTRIBUTE_UNUSED)
+{
+ return false;
+}
+
+bool
+default_builtin_shuffle2 (gimple_stmt_iterator *gsi ATTRIBUTE_UNUSED,
+ tree vec0 ATTRIBUTE_UNUSED,
+ tree vec1 ATTRIBUTE_UNUSED,
+ tree mask ATTRIBUTE_UNUSED)
+{
+ return false;
+}
+
/* Vectorized conversion. */
tree
===================================================================
@@ -75,6 +75,12 @@ extern const char * default_invalid_with
extern tree default_builtin_vectorized_function (tree, tree, tree);
+extern bool default_builtin_shuffle (gimple_stmt_iterator *gsi, tree vec0,
+ tree mask);
+
+extern bool default_builtin_shuffle2 (gimple_stmt_iterator *gsi, tree vec0,
+ tree vec1, tree mask);
+
extern tree default_builtin_vectorized_conversion (unsigned int, tree, tree);
extern int default_builtin_vectorization_cost (enum vect_cost_for_stmt, tree, int);
===================================================================
@@ -829,6 +829,23 @@ DEFHOOK
"",
tree, (tree type, tree *mask_element_type), NULL)
+/* Target built-in that implements vector shuffling, or returns
+ false if not available. */
+DEFHOOK
+(builtin_shuffle,
+ "",
+ bool, (gimple_stmt_iterator *gsi, tree vec0, tree mask),
+ default_builtin_shuffle)
+
+/* Target built-in that implements two vector shuffling, or returns
+ false if not available. */
+DEFHOOK
+(builtin_shuffle2,
+ "",
+ bool, (gimple_stmt_iterator *gsi, tree vec0, tree vec1, tree mask),
+ default_builtin_shuffle2)
+
+
/* Return true if a vector created for builtin_vec_perm is valid. */
DEFHOOK
(builtin_vec_perm_ok,
===================================================================
@@ -1358,6 +1358,27 @@ build_vector_from_ctor (tree type, VEC(c
return build_vector (type, nreverse (list));
}
+/* Build a vector of type VECTYPE where all the elements are SCs. */
+tree
+build_vector_from_val (const tree sc, const tree vectype)
+{
+ tree t = NULL_TREE;
+ int i, nunits = TYPE_VECTOR_SUBPARTS (vectype);
+
+ if (sc == error_mark_node)
+ return sc;
+
+ gcc_assert (TREE_TYPE (sc) == TREE_TYPE (vectype));
+
+ for (i = 0; i < nunits; ++i)
+ t = tree_cons (NULL_TREE, sc, t);
+
+ if (CONSTANT_CLASS_P (sc))
+ return build_vector (vectype, t);
+ else
+ return build_constructor_from_list (vectype, t);
+}
+
/* Return a new CONSTRUCTOR node whose type is TYPE and whose values
are in the VEC pointed to by VALS. */
tree
===================================================================
@@ -4027,6 +4027,7 @@ extern tree build_int_cst_type (tree, HO
extern tree build_int_cst_wide (tree, unsigned HOST_WIDE_INT, HOST_WIDE_INT);
extern tree build_vector (tree, tree);
extern tree build_vector_from_ctor (tree, VEC(constructor_elt,gc) *);
+extern tree build_vector_from_val (const tree, const tree);
extern tree build_constructor (tree, VEC(constructor_elt,gc) *);
extern tree build_constructor_single (tree, tree, tree);
extern tree build_constructor_from_list (tree, tree);
===================================================================
@@ -51,6 +51,7 @@
#include "tm.h"
#include "insn-modes.h"
+#include "gimple.h"
/* Types used by the record_gcc_switches() target function. */
typedef enum
===================================================================
@@ -0,0 +1,43 @@
+#define vector(elcount, type) \
+__attribute__((vector_size((elcount)*sizeof(type)))) type
+
+#define vidx(type, vec, idx) (*(((type *) &(vec)) + idx))
+
+#define shuf2compare(type, count, vres, v0, v1, mask) \
+do { \
+ int __i; \
+ for (__i = 0; __i < count; __i++) { \
+ if (vidx(type, vres, __i) != ((vidx(type, mask, __i) < count) ? \
+ vidx(type, v0, vidx(type, mask, __i)) : \
+ vidx(type, v1, (vidx(type, mask, __i) - count)))) \
+ __builtin_abort (); \
+ } \
+} while (0)
+
+
+int main (int argc, char *argv[]) {
+ vector (8, short) v0 = {argc, 1,2,3,4,5,6,7};
+ vector (8, short) v1 = {argc, 1,argc,3,4,5,argc,7};
+ vector (8, short) v2;
+
+ //vector (8, short) mask = {1,2,5,4,3,6,7};
+
+ vector (8, short) mask0 = {0,2,3,1,4,5,6,7};
+ vector (8, short) mask1 = {0,12,3,4,3,0,10,9};
+ vector (8, short) mask2 = {0,8,1,9,2,10,3,11};
+
+ v2 = __builtin_shuffle (v0, v1, mask0);
+ shuf2compare (short, 8, v2, v0, v1, mask0);
+
+ v2 = __builtin_shuffle (v0, v1, mask1);
+ shuf2compare (short, 8, v2, v0, v1, mask1);
+
+ v2 = __builtin_shuffle (v0, v1, mask2);
+ shuf2compare (short, 8, v2, v0, v1, mask2);
+
+ v2 = __builtin_shuffle (mask0, mask0, v0);
+ shuf2compare (short, 8, v2, mask0, mask0, v0);
+
+ return 0;
+}
+
===================================================================
@@ -0,0 +1,50 @@
+#define vector(elcount, type) \
+__attribute__((vector_size((elcount)*sizeof(type)))) type
+
+#define vidx(type, vec, idx) (*(((type *) &(vec)) + idx))
+
+#define shuf2compare(type, count, vres, v0, v1, mask) \
+do { \
+ int __i; \
+ for (__i = 0; __i < count; __i++) { \
+ if (vidx(type, vres, __i) != ((vidx(type, mask, __i) < count) ? \
+ vidx(type, v0, vidx(type, mask, __i)) : \
+ vidx(type, v1, (vidx(type, mask, __i) - count)))) \
+ __builtin_abort (); \
+ } \
+} while (0)
+
+
+vector (8, short) __attribute__ ((noinline))
+f (vector (8, short) x, vector (8, short) y, vector (8, short) mask) {
+ return __builtin_shuffle (x, y, mask);
+}
+
+
+
+int main (int argc, char *argv[]) {
+ vector (8, short) v0 = {argc, 1,2,3,4,5,6,7};
+ vector (8, short) v1 = {argc, 1,argc,3,4,5,argc,7};
+ vector (8, short) v2;
+
+ //vector (8, short) mask = {1,2,5,4,3,6,7};
+
+ vector (8, short) mask0 = {0,2,3,1,4,5,6,7};
+ vector (8, short) mask1 = {0,12,3,4,3,0,10,9};
+ vector (8, short) mask2 = {0,8,1,9,2,10,3,11};
+
+ v2 = f (v0, v1, mask0);
+ shuf2compare (short, 8, v2, v0, v1, mask0);
+
+ v2 = f (v0, v1, mask1);
+ shuf2compare (short, 8, v2, v0, v1, mask1);
+
+ v2 = f (v0, v1, mask2);
+ shuf2compare (short, 8, v2, v0, v1, mask2);
+
+ v2 = f (mask0, mask0, v0);
+ shuf2compare (short, 8, v2, mask0, mask0, v0);
+
+ return 0;
+}
+
===================================================================
@@ -0,0 +1,46 @@
+#define vector(elcount, type) \
+__attribute__((vector_size((elcount)*sizeof(type)))) type
+
+#define vidx(type, vec, idx) (*(((type *) &(vec)) + idx))
+
+#define shufcompare(type, count, vres, v0, mask) \
+do { \
+ int __i; \
+ for (__i = 0; __i < count; __i++) { \
+ if (vidx(type, vres, __i) != vidx(type, v0, vidx(type, mask, __i))) \
+ __builtin_abort (); \
+ } \
+} while (0)
+
+
+int main (int argc, char *argv[]) {
+ /*vector (8, short) v0 = {argc, 1,2,3,4,5,6,7};
+ vector (8, short) v1 = {argc, 1,argc,3,4,5,argc,7};
+ vector (8, short) v2;
+
+ vector (8, short) smask = {0,0,1,2,3,4,5,6};
+
+ v2 = __builtin_shuffle (v0, smask);
+ shufcompare (short, 8, v2, v0, smask);
+ v2 = __builtin_shuffle (v0, v1);
+ shufcompare (short, 8, v2, v0, v1);
+ v2 = __builtin_shuffle (smask, v0);
+ shufcompare (short, 8, v2, smask, v0);*/
+
+ vector (4, int) i0 = {argc, 1,2,3};
+ vector (4, int) i1 = {argc, 1, argc, 3};
+ vector (4, int) i2;
+
+ vector (4, int) imask = {0,3,2,1};
+
+ /*i2 = __builtin_shuffle (i0, imask);
+ shufcompare (int, 4, i2, i0, imask);*/
+ i2 = __builtin_shuffle (i0, i1);
+ shufcompare (int, 4, i2, i0, i1);
+
+ i2 = __builtin_shuffle (imask, i0);
+ shufcompare (int, 4, i2, imask, i0);
+
+ return 0;
+}
+
===================================================================
@@ -0,0 +1,36 @@
+#define vector(elcount, type) \
+__attribute__((vector_size((elcount)*sizeof(type)))) type
+
+#define vidx(type, vec, idx) (*(((type *) &(vec)) + idx))
+
+#define shufcompare(type, count, vres, v0, mask) \
+do { \
+ int __i; \
+ for (__i = 0; __i < count; __i++) { \
+ if (vidx(type, vres, __i) != vidx(type, v0, vidx(type, mask, __i))) \
+ __builtin_abort (); \
+ } \
+} while (0)
+
+vector (8, short) __attribute__ ((noinline))
+f (vector (8, short) x, vector (8, short) mask) {
+ return __builtin_shuffle (x, mask);
+}
+
+
+int main (int argc, char *argv[]) {
+ vector (8, short) v0 = {argc, 1,2,3,4,5,6,7};
+ vector (8, short) v1 = {argc, 1,argc,3,4,5,argc,7};
+ vector (8, short) v2;
+
+ vector (8, short) mask = {0,0,1,2,3,4,5,6};
+
+ v2 = f (v0, mask);
+ shufcompare (short, 8, v2, v0, mask);
+
+ v2 = f (v0, v1);
+ shufcompare (short, 8, v2, v0, v1);
+
+ return 0;
+}
+
===================================================================
@@ -708,6 +708,8 @@ DEF_GCC_BUILTIN (BUILT_IN_VA_ARG_
DEF_EXT_LIB_BUILTIN (BUILT_IN__EXIT, "_exit", BT_FN_VOID_INT, ATTR_NORETURN_NOTHROW_LIST)
DEF_C99_BUILTIN (BUILT_IN__EXIT2, "_Exit", BT_FN_VOID_INT, ATTR_NORETURN_NOTHROW_LIST)
+DEF_GCC_BUILTIN (BUILT_IN_SHUFFLE, "shuffle", BT_FN_INT_VAR, ATTR_CONST_NOTHROW_TYPEGENERIC)
+
/* Implementing nested functions. */
DEF_BUILTIN_STUB (BUILT_IN_INIT_TRAMPOLINE, "__builtin_init_trampoline")
DEF_BUILTIN_STUB (BUILT_IN_ADJUST_TRAMPOLINE, "__builtin_adjust_trampoline")
===================================================================
@@ -2794,6 +2794,68 @@ build_function_call_vec (location_t loc,
&& !check_builtin_function_arguments (fundecl, nargs, argarray))
return error_mark_node;
+ /* Typecheck a builtin function which is declared with variable
+ argument list. */
+ if (fundecl && DECL_BUILT_IN (fundecl)
+ && DECL_BUILT_IN_CLASS (fundecl) == BUILT_IN_NORMAL)
+ {
+ enum built_in_function fcode = DECL_FUNCTION_CODE (fundecl);
+ if (fcode == BUILT_IN_SHUFFLE)
+ {
+ tree firstarg = VEC_index (tree, params, 0);
+ tree mask = VEC_index (tree, params, nargs - 1);
+
+ if (nargs != 2 && nargs != 3)
+ {
+ error_at (loc, "__builtin_shuffle accepts 2 or 3 argumensts");
+ return error_mark_node;
+ }
+
+ if (TREE_CODE (TREE_TYPE (mask)) != VECTOR_TYPE
+ || TREE_CODE (TREE_TYPE (TREE_TYPE (mask))) != INTEGER_TYPE)
+ {
+ error_at (loc, "__builtin_shuffle last argument must "
+ "be an integer vector");
+ return error_mark_node;
+ }
+
+ if (TREE_CODE (TREE_TYPE (firstarg)) != VECTOR_TYPE
+ || (nargs == 3
+ && TREE_CODE (TREE_TYPE (VEC_index (tree, params, 1)))
+ != VECTOR_TYPE))
+ {
+ error_at (loc, "__builtin_shuffle arguments must be vectors");
+ return error_mark_node;
+ }
+
+ if ((TYPE_VECTOR_SUBPARTS (TREE_TYPE (firstarg))
+ != TYPE_VECTOR_SUBPARTS (TREE_TYPE (mask)))
+ || (nargs == 3
+ && TYPE_VECTOR_SUBPARTS (
+ TREE_TYPE (VEC_index (tree, params, 1)))
+ != TYPE_VECTOR_SUBPARTS (TREE_TYPE (mask))))
+ {
+ error_at (loc, "__builtin_shuffle number of elements of the "
+ "argument vector(s) and the mask vector should "
+ "be the same");
+ return error_mark_node;
+ }
+
+ /* Here we change the return type of the builtin function
+ from int f(...) --> t f(...) where t is a type of the
+ first argument. */
+ fundecl = copy_node (fundecl);
+ TREE_TYPE (fundecl) = build_function_type (TREE_TYPE (firstarg),
+ TYPE_ARG_TYPES (TREE_TYPE (fundecl)));
+ function = build_fold_addr_expr (fundecl);
+ result = build_call_array_loc (loc, TREE_TYPE (firstarg),
+ function, nargs, argarray);
+ return require_complete_type (result);
+ }
+ }
+
+
+
/* Check that the arguments to the function are valid. */
check_function_arguments (TYPE_ATTRIBUTES (fntype), nargs, argarray,
TYPE_ARG_TYPES (fntype));
@@ -6005,10 +6067,17 @@ digest_init (location_t init_loc, tree t
tree value;
bool constant_p = true;
- /* Iterate through elements and check if all constructor
+ /* If constructor has less elements than the vector type. */
+ if (CONSTRUCTOR_NELTS (inside_init)
+ < TYPE_VECTOR_SUBPARTS (TREE_TYPE (inside_init)))
+ warning_at (init_loc, 0, "vector length does not match "
+ "initializer length, zero elements "
+ "will be inserted");
+
+ /* Iterate through elements and check if all constructor
elements are *_CSTs. */
FOR_EACH_CONSTRUCTOR_VALUE (CONSTRUCTOR_ELTS (inside_init), ix, value)
- if (!CONSTANT_CLASS_P (value))
+ if (!CONSTANT_CLASS_P (value))
{
constant_p = false;
break;
===================================================================
@@ -30,6 +30,8 @@ along with GCC; see the file COPYING3.
#include "tree-pass.h"
#include "flags.h"
#include "ggc.h"
+#include "target.h"
+#include "diagnostic.h"
/* Need to include rtl.h, expr.h, etc. for optabs. */
#include "expr.h"
@@ -383,6 +385,277 @@ type_for_widest_vector_mode (enum machin
}
}
+
+/* Build a reference to the element of the vector VECT. Function
+ returns either the element itself, either BIT_FIELD_REF, or an
+ ARRAY_REF expression.
+
+ GSI is requred to insert temporary variables while building a
+ refernece to the element of the vector VECT.
+
+ PTMPVEC is a pointer to the temporary variable for caching
+ purposes. In case when PTMPVEC is NULL new temporary variable
+ will be created. */
+static tree
+vector_element (gimple_stmt_iterator *gsi, tree vect, tree idx, tree *ptmpvec)
+{
+ tree type;
+ gimple asgn;
+ unsigned HOST_WIDE_INT maxval;
+ tree tmpvec;
+ tree indextype, arraytype;
+ bool need_asgn = true;
+
+ gcc_assert (TREE_CODE (TREE_TYPE (vect)) == VECTOR_TYPE);
+
+ type = TREE_TYPE (vect);
+ if (TREE_CODE (idx) == INTEGER_CST)
+ {
+ unsigned HOST_WIDE_INT index;
+
+ if (!host_integerp (idx, 1)
+ || (index = tree_low_cst (idx, 1)) > TYPE_VECTOR_SUBPARTS (type)-1)
+ return error_mark_node;
+
+ if (TREE_CODE (vect) == VECTOR_CST)
+ {
+ unsigned i;
+ tree vals = TREE_VECTOR_CST_ELTS (vect);
+ for (i = 0; vals; vals = TREE_CHAIN (vals), ++i)
+ if (i == index)
+ return TREE_VALUE (vals);
+ return error_mark_node;
+ }
+ else if (TREE_CODE (vect) == CONSTRUCTOR)
+ {
+ unsigned i;
+ VEC (constructor_elt, gc) *vals = CONSTRUCTOR_ELTS (vect);
+ constructor_elt *elt;
+
+ for (i = 0; VEC_iterate (constructor_elt, vals, i, elt); i++)
+ if (operand_equal_p (elt->index, idx, 0))
+ return elt->value;
+ return fold_convert (TREE_TYPE (type), integer_zero_node);
+ }
+ else if (TREE_CODE (vect) == SSA_NAME)
+ {
+ tree el;
+ gimple vectdef = SSA_NAME_DEF_STMT (vect);
+ if (gimple_assign_single_p (vectdef)
+ && (el = vector_element (gsi, gimple_assign_rhs1 (vectdef),
+ idx, ptmpvec))
+ != error_mark_node)
+ return el;
+ else
+ {
+ tree size = TYPE_SIZE (TREE_TYPE (type));
+ tree pos = fold_build2 (MULT_EXPR, TREE_TYPE (idx),
+ idx, size);
+ return fold_build3 (BIT_FIELD_REF, TREE_TYPE (type),
+ vect, size, pos);
+ }
+ }
+ else
+ return error_mark_node;
+ }
+
+ if (!ptmpvec)
+ tmpvec = create_tmp_var (TREE_TYPE (vect), "vectmp");
+ else if (!*ptmpvec)
+ tmpvec = *ptmpvec = create_tmp_var (TREE_TYPE (vect), "vectmp");
+ else
+ {
+ tmpvec = *ptmpvec;
+ need_asgn = false;
+ }
+
+ if (need_asgn)
+ {
+ TREE_ADDRESSABLE (tmpvec) = 1;
+ asgn = gimple_build_assign (tmpvec, vect);
+ gsi_insert_before (gsi, asgn, GSI_SAME_STMT);
+ }
+
+ maxval = TYPE_VECTOR_SUBPARTS (TREE_TYPE (vect)) -1;
+ indextype = build_index_type (size_int (maxval));
+ arraytype = build_array_type (TREE_TYPE (type), indextype);
+
+ return build4 (ARRAY_REF, TREE_TYPE (type),
+ build1 (VIEW_CONVERT_EXPR, arraytype, tmpvec),
+ idx, NULL_TREE, NULL_TREE);
+
+
+}
+
+/* Lower built-in vector shuffle function. Function can have two or
+ three arguments.
+ When function has two arguments: __builtin_shuffle (v0, mask,
+ the lowered version would be {v0[mask[0]], v0[mask[1]], ...}
+ MASK and V0 must have the same number of elements.
+
+ In case of three arguments: __builtin_shuffle (v0, v1, mask)
+ the lowered version would be:
+ {mask[0] < len(v0) ? v0[mask[0]] : v1[mask[0]], ...}
+ V0 and V1 must have the same type. MASK, V0, V1 must have the
+ same number of arguments. */
+static void
+lower_builtin_shuffle (gimple_stmt_iterator *gsi, location_t loc)
+{
+#define TRAP_RETURN(new_stmt, stmt, gsi, vec0) \
+do { \
+ new_stmt = gimple_build_call (built_in_decls[BUILT_IN_TRAP], 0); \
+ gsi_insert_before (gsi, new_stmt, GSI_SAME_STMT); \
+ split_block (gimple_bb (new_stmt), new_stmt); \
+ new_stmt = gimple_build_assign (gimple_call_lhs (stmt), vec0); \
+ gsi_replace (gsi, new_stmt, false); \
+ return; \
+} while (0)
+
+ gimple stmt = gsi_stmt (*gsi);
+ unsigned numargs = gimple_call_num_args (stmt);
+ tree mask = gimple_call_arg (stmt, numargs - 1);
+ tree vec0 = gimple_call_arg (stmt, 0);
+ unsigned els = TYPE_VECTOR_SUBPARTS (TREE_TYPE (mask));
+ tree type0 = TREE_TYPE (TREE_TYPE (vec0));
+ VEC(constructor_elt,gc) *v = NULL;
+ tree vectype, constr;
+ gimple new_stmt;
+ tree vec0tmp = NULL_TREE, masktmp = NULL_TREE;
+
+ if (numargs == 2)
+ {
+ unsigned i;
+ tree vec0tmp = NULL_TREE;
+
+ if (targetm.vectorize.builtin_shuffle (gsi, vec0, mask))
+ {
+ /* Built-in is expanded by target. */
+ return ;
+ }
+
+ v = VEC_alloc (constructor_elt, gc, els);
+ for (i = 0; i < els; i++)
+ {
+ tree idxval, vecel, t;
+ if ((idxval = vector_element (gsi, mask, size_int (i), &masktmp))
+ == error_mark_node)
+ {
+ warning_at (loc, 0, "Invalid shuffling mask index %i", i);
+ TRAP_RETURN (new_stmt, stmt, gsi, vec0);
+ }
+
+ if ((vecel = vector_element (gsi, vec0, idxval, &vec0tmp))
+ == error_mark_node)
+ {
+ warning_at (loc, 0, "Invalid shuffling arguments");
+ TRAP_RETURN (new_stmt, stmt, gsi, vec0);
+ }
+
+ t = force_gimple_operand_gsi (gsi, vecel, true,
+ NULL_TREE, true, GSI_SAME_STMT);
+ CONSTRUCTOR_APPEND_ELT (v, size_int (i), t);
+ }
+ }
+ else if (numargs == 3)
+ {
+ unsigned i;
+ tree vec1 = gimple_call_arg (stmt, 1);
+ tree var = create_tmp_var (type0, "vecel");
+ tree vec1tmp = NULL_TREE;
+
+ if (targetm.vectorize.builtin_shuffle2 (gsi, vec0, vec1, mask))
+ {
+ /* Built-in is expanded by target. */
+ return ;
+ }
+
+ v = VEC_alloc (constructor_elt, gc, els);
+ for (i = 0; i < els; i++)
+ {
+ tree idxval, idx1val, cond, elval0, elval1, condexpr, t, ssatmp;
+ tree vec0el, vec1el;
+ gimple asgn;
+
+ if ((idxval = vector_element (gsi, mask, size_int (i), &masktmp))
+ == error_mark_node)
+ {
+ warning_at (loc, 0, "Invalid shuffling mask index %i", i);
+ TRAP_RETURN (new_stmt, stmt, gsi, vec0);
+ }
+
+ if (TREE_CODE (idxval) == INTEGER_CST)
+ {
+ if (tree_int_cst_lt (idxval, size_int (els)))
+ {
+ vec0el = vector_element (gsi, vec0, idxval, &vec0tmp);
+ t = force_gimple_operand_gsi (gsi, vec0el,
+ true, NULL_TREE, true, GSI_SAME_STMT);
+ }
+ else if (tree_int_cst_lt (idxval, size_int (2*els)))
+ {
+ idx1val = fold_build2 (MINUS_EXPR, TREE_TYPE (idxval),
+ idxval, build_int_cst (TREE_TYPE (idxval), els));
+
+ vec1el = vector_element (gsi, vec1, idx1val, &vec1tmp);
+ t = force_gimple_operand_gsi (gsi, vec1el,
+ true, NULL_TREE, true, GSI_SAME_STMT);
+ }
+ else
+ {
+ warning_at (loc, 0, "Invalid shuffling mask index %i", i);
+ TRAP_RETURN (new_stmt, stmt, gsi, vec0);
+ }
+ }
+ else
+ {
+
+ idx1val = fold_build2 (MINUS_EXPR, TREE_TYPE (idxval),
+ idxval, build_int_cst (TREE_TYPE (idxval), els));
+ idx1val = force_gimple_operand_gsi (gsi, idx1val,
+ true, NULL_TREE, true, GSI_SAME_STMT);
+ cond = build2 (GT_EXPR, boolean_type_node, \
+ idxval, convert (type0, size_int (els - 1)));
+
+ if ((vec0el = vector_element (gsi, vec0, idxval, &vec0tmp))
+ == error_mark_node)
+ {
+ warning_at (loc, 0, "Invalid shuffling arguments");
+ TRAP_RETURN (new_stmt, stmt, gsi, vec0);
+ }
+ elval0 = force_gimple_operand_gsi (gsi, vec0el,
+ true, NULL_TREE, true, GSI_SAME_STMT);
+
+ if ((vec1el = vector_element (gsi, vec1, idx1val, &vec1tmp))
+ == error_mark_node)
+ {
+ warning_at (loc, 0, "Invalid shuffling arguments");
+ TRAP_RETURN (new_stmt, stmt, gsi, vec0);
+ }
+
+ elval1 = force_gimple_operand_gsi (gsi, vec1el,
+ true, NULL_TREE, true, GSI_SAME_STMT);
+
+ condexpr = fold_build3 (COND_EXPR, type0, cond, \
+ elval1, elval0);
+
+ t = force_gimple_operand_gsi (gsi, condexpr, true, \
+ NULL_TREE, true, GSI_SAME_STMT);
+ }
+
+ asgn = gimple_build_assign (var, t);
+ ssatmp = make_ssa_name (var, asgn);
+ gimple_assign_set_lhs (asgn, ssatmp);
+ gsi_insert_before (gsi, asgn, GSI_SAME_STMT);
+ CONSTRUCTOR_APPEND_ELT (v, size_int (i), ssatmp);
+ }
+ }
+
+ vectype = build_vector_type (type0, els);
+ constr = build_constructor (vectype, v);
+ new_stmt = gimple_build_assign (gimple_call_lhs (stmt), constr);
+ gsi_replace (gsi, new_stmt, false);
+}
+
/* Process one statement. If we identify a vector operation, expand it. */
static void
@@ -396,6 +669,12 @@ expand_vector_operations_1 (gimple_stmt_
enum gimple_rhs_class rhs_class;
tree new_rhs;
+ if (gimple_call_builtin_p (stmt, BUILT_IN_SHUFFLE))
+ {
+ lower_builtin_shuffle (gsi, gimple_location (stmt));
+ gimple_set_modified (gsi_stmt (*gsi), true);
+ }
+
if (gimple_code (stmt) != GIMPLE_ASSIGN)
return;
@@ -521,10 +800,11 @@ expand_vector_operations_1 (gimple_stmt_
/* Use this to lower vector operations introduced by the vectorizer,
if it may need the bit-twiddling tricks implemented in this file. */
+
static bool
-gate_expand_vector_operations (void)
+gate_expand_vector_operations_noop (void)
{
- return flag_tree_vectorize != 0;
+ return optimize == 0;
}
static unsigned int
@@ -549,7 +829,7 @@ struct gimple_opt_pass pass_lower_vector
{
GIMPLE_PASS,
"veclower", /* name */
- 0, /* gate */
+ gate_expand_vector_operations_noop, /* gate */
expand_vector_operations, /* execute */
NULL, /* sub */
NULL, /* next */
@@ -559,8 +839,8 @@ struct gimple_opt_pass pass_lower_vector
0, /* properties_provided */
0, /* properties_destroyed */
0, /* todo_flags_start */
- TODO_dump_func | TODO_ggc_collect
- | TODO_verify_stmts /* todo_flags_finish */
+ TODO_dump_func | TODO_update_ssa | TODO_ggc_collect
+ | TODO_verify_stmts | TODO_cleanup_cfg/* todo_flags_finish */
}
};
@@ -569,7 +849,7 @@ struct gimple_opt_pass pass_lower_vector
{
GIMPLE_PASS,
"veclower2", /* name */
- gate_expand_vector_operations, /* gate */
+ 0, /* gate */
expand_vector_operations, /* execute */
NULL, /* sub */
NULL, /* next */
@@ -582,6 +862,7 @@ struct gimple_opt_pass pass_lower_vector
TODO_dump_func | TODO_update_ssa /* todo_flags_finish */
| TODO_verify_ssa
| TODO_verify_stmts | TODO_verify_flow
+ | TODO_cleanup_cfg
}
};
===================================================================
@@ -864,7 +864,7 @@ endif
VEC_H = vec.h statistics.h
EXCEPT_H = except.h $(HASHTAB_H) vecprim.h vecir.h
TOPLEV_H = toplev.h $(INPUT_H) bversion.h $(DIAGNOSTIC_CORE_H)
-TARGET_H = $(TM_H) target.h target.def insn-modes.h
+TGT = $(TM_H) target.h target.def insn-modes.h
MACHMODE_H = machmode.h mode-classes.def insn-modes.h
HOOKS_H = hooks.h $(MACHMODE_H)
HOSTHOOKS_DEF_H = hosthooks-def.h $(HOOKS_H)
@@ -886,8 +886,9 @@ TREE_H = tree.h all-tree.def tree.def c-
REGSET_H = regset.h $(BITMAP_H) hard-reg-set.h
BASIC_BLOCK_H = basic-block.h $(PREDICT_H) $(VEC_H) $(FUNCTION_H) cfghooks.h
GIMPLE_H = gimple.h gimple.def gsstruct.def pointer-set.h $(VEC_H) \
- $(GGC_H) $(BASIC_BLOCK_H) $(TM_H) $(TARGET_H) tree-ssa-operands.h \
+ $(GGC_H) $(BASIC_BLOCK_H) $(TM_H) $(TGT) tree-ssa-operands.h \
tree-ssa-alias.h vecir.h
+TARGET_H = $(TGT) gimple.h
GCOV_IO_H = gcov-io.h gcov-iov.h auto-host.h
COVERAGE_H = coverage.h $(GCOV_IO_H)
DEMANGLE_H = $(srcdir)/../include/demangle.h
@@ -3156,7 +3157,7 @@ tree-vect-generic.o : tree-vect-generic.
$(TM_H) $(TREE_FLOW_H) $(GIMPLE_H) tree-iterator.h $(TREE_PASS_H) \
$(FLAGS_H) $(OPTABS_H) $(MACHMODE_H) $(EXPR_H) \
langhooks.h $(FLAGS_H) $(DIAGNOSTIC_H) gt-tree-vect-generic.h $(GGC_H) \
- coretypes.h insn-codes.h
+ coretypes.h insn-codes.h target.h diagnostic.h
df-core.o : df-core.c $(CONFIG_H) $(SYSTEM_H) coretypes.h $(TM_H) $(RTL_H) \
insn-config.h $(RECOG_H) $(FUNCTION_H) $(REGS_H) alloc-pool.h \
hard-reg-set.h $(BASIC_BLOCK_H) $(DF_H) $(BITMAP_H) sbitmap.h $(TIMEVAR_H) \
===================================================================
@@ -735,7 +735,6 @@ init_optimization_passes (void)
NEXT_PASS (pass_refactor_eh);
NEXT_PASS (pass_lower_eh);
NEXT_PASS (pass_build_cfg);
- NEXT_PASS (pass_lower_vector);
NEXT_PASS (pass_warn_function_return);
NEXT_PASS (pass_build_cgraph_edges);
NEXT_PASS (pass_inline_parameters);
@@ -763,6 +762,7 @@ init_optimization_passes (void)
NEXT_PASS (pass_referenced_vars);
NEXT_PASS (pass_build_ssa);
+ NEXT_PASS (pass_lower_vector);
NEXT_PASS (pass_early_warn_uninitialized);
/* Note that it is not strictly necessary to schedule an early
inline pass here. However, some test cases (e.g.,
@@ -915,7 +915,6 @@ init_optimization_passes (void)
NEXT_PASS (pass_vectorize);
{
struct opt_pass **p = &pass_vectorize.pass.sub;
- NEXT_PASS (pass_lower_vector_ssa);
NEXT_PASS (pass_dce_loop);
}
NEXT_PASS (pass_predcom);
@@ -926,6 +925,7 @@ init_optimization_passes (void)
NEXT_PASS (pass_iv_optimize);
NEXT_PASS (pass_tree_loop_done);
}
+ NEXT_PASS (pass_lower_vector_ssa);
NEXT_PASS (pass_cse_reciprocals);
NEXT_PASS (pass_reassoc);
NEXT_PASS (pass_vrp);
===================================================================
@@ -25,6 +25,7 @@ along with GCC; see the file COPYING3.
#include "tm.h"
#include "rtl.h"
#include "tree.h"
+#include "tree-flow.h"
#include "tm_p.h"
#include "regs.h"
#include "hard-reg-set.h"
@@ -27855,6 +27856,9 @@ struct expand_vec_perm_d
static bool expand_vec_perm_1 (struct expand_vec_perm_d *d);
static bool expand_vec_perm_broadcast_1 (struct expand_vec_perm_d *d);
+static int extract_vec_perm_cst (struct expand_vec_perm_d *, tree);
+static bool ix86_vectorize_builtin_vec_perm_ok (tree vec_type, tree mask);
+
/* Get a vector mode of the same size as the original but with elements
twice as wide. This is only guaranteed to apply to integral vectors. */
@@ -30222,6 +30226,182 @@ ix86_vectorize_builtin_vec_perm (tree ve
return ix86_builtins[(int) fcode];
}
+/* Lower shuffling the vector VEC0 as specified by MASK. Replaces the
+ statement at *GSI with a target specific sequence implementing the
+ shuffle operation and returns true. Returns false if no target
+ specific sequence for this shuffle operation exists. */
+static bool
+ix86_vectorize_builtin_shuffle (gimple_stmt_iterator *gsi,
+ tree vec0, tree mask)
+{
+ if (!VECTOR_MODE_P (TYPE_MODE (TREE_TYPE (vec0))))
+ return false;
+
+ /* Recursively grab the definition of the variable. */
+ while (TREE_CODE (mask) == SSA_NAME)
+ {
+ gimple maskdef = SSA_NAME_DEF_STMT (mask);
+ if (gimple_assign_single_p (maskdef))
+ mask = gimple_assign_rhs1 (maskdef);
+ else
+ break;
+ }
+
+ if (TREE_CODE (mask) == VECTOR_CST)
+ {
+ tree t, m_type;
+ if (ix86_vectorize_builtin_vec_perm_ok (TREE_TYPE (vec0), mask))
+ {
+ t = ix86_vectorize_builtin_vec_perm (TREE_TYPE (vec0), &m_type);
+
+ if (t != NULL_TREE)
+ {
+ gimple c = gimple_build_call (t, 3, vec0, vec0, mask);
+ gimple stmt = gsi_stmt (*gsi);
+ gimple_call_set_lhs (c, gimple_call_lhs (stmt));
+ gsi_replace (gsi, c, false);
+ return true;
+ }
+ }
+ }
+ /* If we cannot expand it via vec_perm, we will try to expand it
+ via PSHUFB instruction. */
+ {
+ tree mtype = TREE_TYPE (mask);
+ unsigned HOST_WIDE_INT i = 1, w = TYPE_VECTOR_SUBPARTS (mtype);
+ tree mcst, c, m1;
+ tree mvar, m1var, t;
+ tree fntype;
+ gimple asgn;
+
+ if (tree_low_cst (TYPE_SIZE (mtype), 1) != 128)
+ return false;
+
+ if (!TARGET_SSE3 && !TARGET_AVX)
+ return false;
+
+ if (NULL_TREE == (fntype = ix86_builtins[(int) IX86_BUILTIN_PSHUFB128]))
+ return false;
+
+ mvar = create_tmp_var (mtype, "mask");
+ m1var = create_tmp_var (mtype, "nmask");
+
+ c = build_int_cst (TREE_TYPE (mtype), w-1);
+ mcst = build_vector_from_val (c, mtype);
+
+ /* mvar = mask & {w-1, w-1, w-1,...} */
+ m1 = build2 (BIT_AND_EXPR, mtype, mask, mcst);
+ t = force_gimple_operand_gsi (gsi, m1,
+ true, NULL_TREE, true, GSI_SAME_STMT);
+ asgn = gimple_build_assign (mvar, t);
+ gsi_insert_before (gsi, asgn, GSI_SAME_STMT);
+
+ /* m1var = mvar */
+ asgn = gimple_build_assign (m1var, t);
+ gsi_insert_before (gsi, asgn, GSI_SAME_STMT);
+
+ while (w != 16)
+ {
+ /* m1var = mm1var << 8*i */
+ m1 = build2 (LSHIFT_EXPR, mtype, m1var,
+ build_int_cst (TREE_TYPE (mtype), 8*i));
+ t = force_gimple_operand_gsi (gsi, m1,
+ true, NULL_TREE, true, GSI_SAME_STMT);
+ asgn = gimple_build_assign (m1var, t);
+ gsi_insert_before (gsi, asgn , GSI_SAME_STMT);
+
+ /* mvar = mvar | m1var */
+ m1 = build2 (BIT_IOR_EXPR, mtype, mvar, m1var);
+ t = force_gimple_operand_gsi (gsi, m1,
+ true, NULL_TREE, true, GSI_SAME_STMT);
+ asgn = gimple_build_assign (mvar, t);
+ gsi_insert_before (gsi, asgn, GSI_SAME_STMT);
+
+ /* m1var = mvar */
+ t = force_gimple_operand_gsi (gsi, mvar,
+ true, NULL_TREE, true, GSI_SAME_STMT);
+ asgn = gimple_build_assign (m1var, t);
+ gsi_insert_before (gsi, asgn, GSI_SAME_STMT);
+
+ w *= 2;
+ i *= 2;
+ }
+
+ if (fntype != NULL_TREE)
+ {
+ tree v, m, r, ctype;
+ gimple c, stmt, asgn;
+
+ ctype = build_vector_type (char_type_node, 16);
+ r = create_tmp_var (ctype, "res");
+
+ v = force_gimple_operand_gsi (gsi,
+ fold_build1 (VIEW_CONVERT_EXPR, ctype, vec0),
+ true, NULL_TREE, true, GSI_SAME_STMT);
+
+ m = force_gimple_operand_gsi (gsi,
+ fold_build1 (VIEW_CONVERT_EXPR, ctype, mvar),
+ true, NULL_TREE, true, GSI_SAME_STMT);
+
+ c = gimple_build_call (fntype, 2, v, m);
+ gimple_call_set_lhs (c, r);
+ gsi_insert_before (gsi, c, GSI_SAME_STMT);
+
+ stmt = gsi_stmt (*gsi);
+ t = force_gimple_operand_gsi (gsi,
+ build1 (VIEW_CONVERT_EXPR, TREE_TYPE (vec0), r),
+ true, NULL_TREE, true, GSI_SAME_STMT);
+ asgn = gimple_build_assign (gimple_call_lhs (stmt), t);
+ gsi_replace (gsi, asgn, false);
+ return true;
+ }
+ }
+ return false;
+}
+
+/* Lower shuffling vectors VEC0 and VEC1 as specified by MASK.
+ Replaces the statement at *GSI with a target specific sequence
+ implementing the shuffle operation and returns true. Returns
+ false if no target specific sequence for this shuffle operation
+ exists. */
+static bool
+ix86_vectorize_builtin_shuffle2 (gimple_stmt_iterator *gsi,
+ tree vec0, tree vec1, tree mask)
+{
+ if (!VECTOR_MODE_P (TYPE_MODE (TREE_TYPE (vec0))))
+ return false;
+
+ /* Check wheteher vector size is 128 or 256 */
+ while (TREE_CODE (mask) == SSA_NAME)
+ {
+ gimple maskdef = SSA_NAME_DEF_STMT (mask);
+ if (gimple_assign_single_p (maskdef))
+ mask = gimple_assign_rhs1 (maskdef);
+ else
+ break;
+ }
+
+ if (TREE_CODE (mask) == VECTOR_CST)
+ {
+ tree t, m_type;
+ if (!ix86_vectorize_builtin_vec_perm_ok (TREE_TYPE (vec0), mask))
+ return false;
+
+ t = ix86_vectorize_builtin_vec_perm (TREE_TYPE (vec0), &m_type);
+
+ if (t != NULL_TREE)
+ {
+ gimple c = gimple_build_call (t, 3, vec0, vec1, mask);
+ gimple stmt = gsi_stmt (*gsi);
+ gimple_call_set_lhs (c, gimple_call_lhs (stmt));
+ gsi_replace (gsi, c, false);
+ return true;
+ }
+ }
+
+ return false;
+}
+
/* Return a vector mode with twice as many elements as VMODE. */
/* ??? Consider moving this to a table generated by genmodes.c. */
@@ -31139,13 +31319,21 @@ extract_vec_perm_cst (struct expand_vec_
unsigned i, nelt = d->nelt;
int ret = 0;
- for (i = 0; i < nelt; ++i, list = TREE_CHAIN (list))
+ for (i = 0; i < nelt; ++i, list =
+ (list == NULL_TREE ? NULL_TREE : TREE_CHAIN (list)))
{
unsigned HOST_WIDE_INT e;
+ tree value;
+
+ if (list != NULL_TREE)
+ value = TREE_VALUE (list);
+ else
+ value = fold_convert (TREE_TYPE (TREE_TYPE (cst)),
+ integer_zero_node);
- if (!host_integerp (TREE_VALUE (list), 1))
+ if (!host_integerp (value, 1))
return 0;
- e = tree_low_cst (TREE_VALUE (list), 1);
+ e = tree_low_cst (value, 1);
if (e >= 2 * nelt)
return 0;
@@ -31294,10 +31482,10 @@ ix86_vectorize_builtin_vec_perm_ok (tree
vec_mask = extract_vec_perm_cst (&d, mask);
- /* This hook is cannot be called in response to something that the
- user does (unlike the builtin expander) so we shouldn't ever see
- an error generated from the extract. */
- gcc_assert (vec_mask > 0 && vec_mask <= 3);
+ /* Check whether the mask can be applied to the vector type. */
+ if (vec_mask < 0 || vec_mask > 3)
+ return false;
+
one_vec = (vec_mask != 3);
/* Implementable with shufps or pshufd. */
@@ -31715,6 +31903,14 @@ ix86_enum_va_list (int idx, const char *
#define TARGET_VECTORIZE_BUILTIN_VEC_PERM_OK \
ix86_vectorize_builtin_vec_perm_ok
+#undef TARGET_VECTORIZE_BUILTIN_SHUFFLE
+#define TARGET_VECTORIZE_BUILTIN_SHUFFLE \
+ ix86_vectorize_builtin_shuffle
+
+#undef TARGET_VECTORIZE_BUILTIN_SHUFFLE2
+#define TARGET_VECTORIZE_BUILTIN_SHUFFLE2 \
+ ix86_vectorize_builtin_shuffle2
+
#undef TARGET_SET_CURRENT_FUNCTION
#define TARGET_SET_CURRENT_FUNCTION ix86_set_current_function