@@ -43665,39 +43665,169 @@ ix86_memmodel_check (unsigned HOST_WIDE_
return val;
}
-/* Return the default mangling character when no vector size can be
- determined from the `processor' clause. */
-
-static char
-ix86_cilkplus_default_vecsize_mangle (struct cgraph_node *clone
- ATTRIBUTE_UNUSED)
+/* Set CLONEI->vecsize_mangle, CLONEI->vecsize_int,
+ CLONEI->vecsize_float and if CLONEI->simdlen is 0, also
+ CLONEI->simdlen. Return 0 if SIMD clones shouldn't be emitted,
+ or number of vecsize_mangle variants that should be emitted. */
+
+static int
+ix86_simd_clone_compute_vecsize_and_simdlen (struct cgraph_node *node,
+ struct cgraph_simd_clone *clonei,
+ tree base_type, int num)
{
- return 'x';
+ int ret = 1;
+
+ if (clonei->simdlen
+ && (clonei->simdlen < 2
+ || clonei->simdlen > 16
+ || (clonei->simdlen & (clonei->simdlen - 1)) != 0))
+ {
+ warning_at (DECL_SOURCE_LOCATION (node->decl), 0,
+ "unsupported simdlen %d\n", clonei->simdlen);
+ return 0;
+ }
+
+ tree ret_type = TREE_TYPE (TREE_TYPE (node->decl));
+ if (TREE_CODE (ret_type) != VOID_TYPE)
+ switch (TYPE_MODE (ret_type))
+ {
+ case QImode:
+ case HImode:
+ case SImode:
+ case DImode:
+ case SFmode:
+ case DFmode:
+ /* case SCmode: */
+ /* case DCmode: */
+ break;
+ default:
+ warning_at (DECL_SOURCE_LOCATION (node->decl), 0,
+ "unsupported return type %qT for simd\n", ret_type);
+ return 0;
+ }
+
+ tree t;
+ int i;
+
+ for (t = DECL_ARGUMENTS (node->decl), i = 0; t; t = DECL_CHAIN (t), i++)
+ /* FIXME: Shouldn't we allow such arguments if they are uniform? */
+ switch (TYPE_MODE (TREE_TYPE (t)))
+ {
+ case QImode:
+ case HImode:
+ case SImode:
+ case DImode:
+ case SFmode:
+ case DFmode:
+ /* case SCmode: */
+ /* case DCmode: */
+ break;
+ default:
+ warning_at (DECL_SOURCE_LOCATION (node->decl), 0,
+ "unsupported argument type %qT for simd\n", TREE_TYPE (t));
+ return 0;
+ }
+
+ if (clonei->cilk_elemental)
+ {
+ /* Parse here processor clause. If not present, default to 'b'. */
+ clonei->vecsize_mangle = 'b';
+ }
+ else
+ {
+ clonei->vecsize_mangle = "bcd"[num];
+ ret = 3;
+ }
+ switch (clonei->vecsize_mangle)
+ {
+ case 'b':
+ clonei->vecsize_int = 128;
+ clonei->vecsize_float = 128;
+ break;
+ case 'c':
+ clonei->vecsize_int = 128;
+ clonei->vecsize_float = 256;
+ break;
+ case 'd':
+ clonei->vecsize_int = 256;
+ clonei->vecsize_float = 256;
+ break;
+ }
+ if (clonei->simdlen == 0)
+ {
+ if (SCALAR_INT_MODE_P (TYPE_MODE (base_type)))
+ clonei->simdlen = clonei->vecsize_int;
+ else
+ clonei->simdlen = clonei->vecsize_float;
+ clonei->simdlen /= GET_MODE_BITSIZE (TYPE_MODE (base_type));
+ if (clonei->simdlen > 16)
+ clonei->simdlen = 16;
+ }
+ return ret;
}
-/* Return the hardware vector size (in bits) for a mangling
- character. */
+/* Add target attribute to SIMD clone NODE if needed. */
-static unsigned int
-ix86_cilkplus_vecsize_for_mangle (char mangle)
+static void
+ix86_simd_clone_adjust (struct cgraph_node *node)
{
- /* ?? Intel currently has no ISA encoding character for AVX-512. */
- switch (mangle)
+ const char *str = NULL;
+ gcc_assert (node->decl == cfun->decl);
+ switch (node->simdclone->vecsize_mangle)
{
- case 'x':
- /* xmm (SSE2). */
- return 128;
- case 'y':
- /* ymm1 (AVX1). */
- case 'Y':
- /* ymm2 (AVX2). */
- return 256;
- case 'z':
- /* zmm (MIC). */
- return 512;
+ case 'b':
+ if (!TARGET_SSE2)
+ str = "sse2";
+ break;
+ case 'c':
+ if (!TARGET_AVX)
+ str = "avx";
+ break;
+ case 'd':
+ if (!TARGET_AVX2)
+ str = "avx2";
+ break;
default:
gcc_unreachable ();
+ }
+ if (str == NULL)
+ return;
+ push_cfun (NULL);
+ tree args = build_tree_list (NULL_TREE, build_string (strlen (str), str));
+ bool ok = ix86_valid_target_attribute_p (node->decl, NULL, args, 0);
+ gcc_assert (ok);
+ pop_cfun ();
+ ix86_previous_fndecl = NULL_TREE;
+ ix86_set_current_function (node->decl);
+}
+
+/* If SIMD clone NODE can't be used in a vectorized loop
+ in current function, return -1, otherwise return a badness of using it
+ (0 if it is most desirable from vecsize_mangle point of view, 1
+ slightly less desirable, etc.). */
+
+static int
+ix86_simd_clone_usable (struct cgraph_node *node)
+{
+ switch (node->simdclone->vecsize_mangle)
+ {
+ case 'b':
+ if (!TARGET_SSE2)
+ return -1;
+ if (!TARGET_AVX)
+ return 0;
+ return TARGET_AVX2 ? 2 : 1;
+ case 'c':
+ if (!TARGET_AVX)
+ return -1;
+ return TARGET_AVX2 ? 1 : 0;
+ break;
+ case 'd':
+ if (!TARGET_AVX2)
+ return -1;
return 0;
+ default:
+ gcc_unreachable ();
}
}
@@ -44189,13 +44319,17 @@ ix86_atomic_assign_expand_fenv (tree *ho
#undef TARGET_SPILL_CLASS
#define TARGET_SPILL_CLASS ix86_spill_class
-#undef TARGET_CILKPLUS_DEFAULT_VECSIZE_MANGLE
-#define TARGET_CILKPLUS_DEFAULT_VECSIZE_MANGLE \
- ix86_cilkplus_default_vecsize_mangle
-
-#undef TARGET_CILKPLUS_VECSIZE_FOR_MANGLE
-#define TARGET_CILKPLUS_VECSIZE_FOR_MANGLE \
- ix86_cilkplus_vecsize_for_mangle
+#undef TARGET_SIMD_CLONE_COMPUTE_VECSIZE_AND_SIMDLEN
+#define TARGET_SIMD_CLONE_COMPUTE_VECSIZE_AND_SIMDLEN \
+ ix86_simd_clone_compute_vecsize_and_simdlen
+
+#undef TARGET_SIMD_CLONE_ADJUST
+#define TARGET_SIMD_CLONE_ADJUST \
+ ix86_simd_clone_adjust
+
+#undef TARGET_SIMD_CLONE_USABLE
+#define TARGET_SIMD_CLONE_USABLE \
+ ix86_simd_clone_usable
#undef TARGET_FLOAT_EXCEPTIONS_ROUNDING_SUPPORTED_P
#define TARGET_FLOAT_EXCEPTIONS_ROUNDING_SUPPORTED_P \
@@ -11245,68 +11245,65 @@ make_pass_diagnose_omp_blocks (gcc::cont
/* SIMD clone supporting code. */
-/* A map for function arguments. This will map a zero-based integer
- to the corresponding index into DECL_ARGUMENTS. */
-class argno_map
-{
- vec<tree> tree_args;
- public:
- /* Default constructor declared but not implemented by design. The
- only valid constructor is the TREE version below. */
- argno_map ();
- argno_map (tree fndecl);
-
- ~argno_map () { tree_args.release (); }
- unsigned int length () { return tree_args.length (); }
- tree operator[] (unsigned n) { return tree_args[n]; }
-};
-
-/* FNDECL is the function containing the arguments. */
-
-argno_map::argno_map (tree fndecl)
-{
- tree_args.create (5);
- for (tree t = DECL_ARGUMENTS (fndecl); t; t = DECL_CHAIN (t))
- tree_args.safe_push (t);
-}
-
/* Allocate a fresh `simd_clone' and return it. NARGS is the number
of arguments to reserve space for. */
-static struct simd_clone *
+static struct cgraph_simd_clone *
simd_clone_struct_alloc (int nargs)
{
- struct simd_clone *clone_info;
- size_t len = (sizeof (struct simd_clone)
- + nargs * sizeof (struct simd_clone_arg));
- clone_info = ggc_alloc_cleared_simd_clone_stat (len PASS_MEM_STAT);
+ struct cgraph_simd_clone *clone_info;
+ size_t len = (sizeof (struct cgraph_simd_clone)
+ + nargs * sizeof (struct cgraph_simd_clone_arg));
+ clone_info = (struct cgraph_simd_clone *)
+ ggc_internal_cleared_alloc_stat (len PASS_MEM_STAT);
return clone_info;
}
-/* Make a copy of the `struct simd_clone' in FROM to TO. */
+/* Make a copy of the `struct cgraph_simd_clone' in FROM to TO. */
static inline void
-simd_clone_struct_copy (struct simd_clone *to, struct simd_clone *from)
+simd_clone_struct_copy (struct cgraph_simd_clone *to,
+ struct cgraph_simd_clone *from)
{
- memcpy (to, from, (sizeof (struct simd_clone)
- + from->nargs * sizeof (struct simd_clone_arg)));
+ memcpy (to, from, (sizeof (struct cgraph_simd_clone)
+ + from->nargs * sizeof (struct cgraph_simd_clone_arg)));
}
-/* Given a simd clone in NEW_NODE, extract the simd specific
- information from the OMP clauses passed in CLAUSES, and set the
- relevant bits in the cgraph node. *INBRANCH_SPECIFIED is set to
- TRUE if the `inbranch' or `notinbranch' clause specified, otherwise
- set to FALSE. */
+/* Return vector of parameter types of function FNDECL. This uses
+ TYPE_ARG_TYPES if available, otherwise falls back to types of
+ DECL_ARGUMENTS types. */
+
+vec<tree>
+simd_clone_vector_of_formal_parm_types (tree fndecl)
+{
+ if (TYPE_ARG_TYPES (TREE_TYPE (fndecl)))
+ return ipa_get_vector_of_formal_parm_types (TREE_TYPE (fndecl));
+ vec<tree> args = ipa_get_vector_of_formal_parms (fndecl);
+ unsigned int i;
+ tree arg;
+ FOR_EACH_VEC_ELT (args, i, arg)
+ args[i] = TREE_TYPE (args[i]);
+ return args;
+}
-static void
-simd_clone_clauses_extract (struct cgraph_node *new_node, tree clauses,
+/* Given a simd function in NODE, extract the simd specific
+ information from the OMP clauses passed in CLAUSES, and return
+ the struct cgraph_simd_clone * if it should be cloned. *INBRANCH_SPECIFIED
+ is set to TRUE if the `inbranch' or `notinbranch' clause specified,
+ otherwise set to FALSE. */
+
+static struct cgraph_simd_clone *
+simd_clone_clauses_extract (struct cgraph_node *node, tree clauses,
bool *inbranch_specified)
{
+ vec<tree> args = simd_clone_vector_of_formal_parm_types (node->decl);
tree t;
- int n = 0;
+ int n;
*inbranch_specified = false;
- for (t = DECL_ARGUMENTS (new_node->decl); t; t = DECL_CHAIN (t))
- ++n;
+
+ n = args.length ();
+ if (n > 0 && args.last () == void_type_node)
+ n--;
/* To distinguish from an OpenMP simd clone, Cilk Plus functions to
be cloned have a distinctive artificial label in addition to "omp
@@ -11314,21 +11311,22 @@ simd_clone_clauses_extract (struct cgrap
bool cilk_clone
= (flag_enable_cilkplus
&& lookup_attribute ("cilk plus elemental",
- DECL_ATTRIBUTES (new_node->decl)));
+ DECL_ATTRIBUTES (node->decl)));
/* Allocate one more than needed just in case this is an in-branch
clone which will require a mask argument. */
- struct simd_clone *clone_info = simd_clone_struct_alloc (n + 1);
+ struct cgraph_simd_clone *clone_info = simd_clone_struct_alloc (n + 1);
clone_info->nargs = n;
clone_info->cilk_elemental = cilk_clone;
- gcc_assert (!new_node->simdclone);
- new_node->simdclone = clone_info;
if (!clauses)
- return;
+ {
+ args.release ();
+ return clone_info;
+ }
clauses = TREE_VALUE (clauses);
if (!clauses || TREE_CODE (clauses) != OMP_CLAUSE)
- return;
+ return clone_info;
for (t = clauses; t; t = OMP_CLAUSE_CHAIN (t))
{
@@ -11361,12 +11359,22 @@ simd_clone_clauses_extract (struct cgrap
}
else
{
+ if (POINTER_TYPE_P (args[argno]))
+ step = fold_convert (ssizetype, step);
if (!tree_fits_shwi_p (step))
- warning_at (OMP_CLAUSE_LOCATION (t), 0,
- "ignoring large linear step");
+ {
+ warning_at (OMP_CLAUSE_LOCATION (t), 0,
+ "ignoring large linear step");
+ args.release ();
+ return NULL;
+ }
else if (integer_zerop (step))
- warning_at (OMP_CLAUSE_LOCATION (t), 0,
- "ignoring zero linear step");
+ {
+ warning_at (OMP_CLAUSE_LOCATION (t), 0,
+ "ignoring zero linear step");
+ args.release ();
+ return NULL;
+ }
else
{
clone_info->args[argno].arg_type
@@ -11396,39 +11404,20 @@ simd_clone_clauses_extract (struct cgrap
break;
}
}
+ args.release ();
+ return clone_info;
}
-/* Helper function for mangling vectors. Given a vector size in bits,
- return the corresponding mangling character. */
-
-static char
-vecsize_mangle (unsigned int vecsize)
-{
- switch (vecsize)
- {
- /* The Intel Vector ABI does not provide a mangling character
- for a 64-bit ISA, but this feels like it's keeping with the
- design. */
- case 64: return 'w';
-
- case 128: return 'x';
- case 256: return 'y';
- case 512: return 'z';
- default:
- /* FIXME: We must come up with a default mangling bit. */
- return 'x';
- }
-}
-
-/* Given a SIMD clone in NEW_NODE, calculate the characteristic data
+/* Given a SIMD clone in NODE, calculate the characteristic data
type and return the coresponding type. The characteristic data
type is computed as described in the Intel Vector ABI. */
static tree
-simd_clone_compute_base_data_type (struct cgraph_node *new_node)
+simd_clone_compute_base_data_type (struct cgraph_node *node,
+ struct cgraph_simd_clone *clone_info)
{
tree type = integer_type_node;
- tree fndecl = new_node->decl;
+ tree fndecl = node->decl;
/* a) For non-void function, the characteristic data type is the
return type. */
@@ -11440,14 +11429,14 @@ simd_clone_compute_base_data_type (struc
such parameter. */
else
{
- argno_map map (fndecl);
- for (unsigned int i = 0; i < new_node->simdclone->nargs; ++i)
- if (new_node->simdclone->args[i].arg_type
- == SIMD_CLONE_ARG_TYPE_VECTOR)
+ vec<tree> map = simd_clone_vector_of_formal_parm_types (fndecl);
+ for (unsigned int i = 0; i < clone_info->nargs; ++i)
+ if (clone_info->args[i].arg_type == SIMD_CLONE_ARG_TYPE_VECTOR)
{
- type = TREE_TYPE (map[i]);
+ type = map[i];
break;
}
+ map.release ();
}
/* c) If the characteristic data type determined by a) or b) above
@@ -11470,55 +11459,13 @@ simd_clone_compute_base_data_type (struc
/* Well, we don't handle Xeon Phi yet. */
}
-/* Given a SIMD clone in NEW_NODE, compute simdlen and vector size,
- and store them in NEW_NODE->simdclone. */
-
-static void
-simd_clone_compute_vecsize_and_simdlen (struct cgraph_node *new_node)
-{
- char vmangle = new_node->simdclone->vecsize_mangle;
- /* Vector size for this clone. */
- unsigned int vecsize = 0;
- /* Base vector type, based on function arguments. */
- tree base_type = simd_clone_compute_base_data_type (new_node);
- unsigned int base_type_size = GET_MODE_BITSIZE (TYPE_MODE (base_type));
-
- /* Calculate everything for Cilk Plus clones with appropriate target
- support. This is as specified in the Intel Vector ABI.
-
- Note: Any target which supports the Cilk Plus processor clause
- must also provide appropriate target hooks for calculating
- default ISA/processor (default_vecsize_mangle), and for
- calculating hardware vector size based on ISA/processor
- (vecsize_for_mangle). */
- if (new_node->simdclone->cilk_elemental
- && targetm.cilkplus.default_vecsize_mangle)
- {
- if (!vmangle)
- vmangle = targetm.cilkplus.default_vecsize_mangle (new_node);
- vecsize = targetm.cilkplus.vecsize_for_mangle (vmangle);
- if (!new_node->simdclone->simdlen)
- new_node->simdclone->simdlen = vecsize / base_type_size;
- }
- /* Calculate everything else generically. */
- else
- {
- vecsize = GET_MODE_BITSIZE (targetm.vectorize.preferred_simd_mode
- (TYPE_MODE (base_type)));
- vmangle = vecsize_mangle (vecsize);
- if (!new_node->simdclone->simdlen)
- new_node->simdclone->simdlen = vecsize / base_type_size;
- }
- new_node->simdclone->vecsize_mangle = vmangle;
- new_node->simdclone->hw_vector_size = vecsize;
-}
-
-static void
-simd_clone_mangle (struct cgraph_node *old_node, struct cgraph_node *new_node)
+static tree
+simd_clone_mangle (struct cgraph_node *node,
+ struct cgraph_simd_clone *clone_info)
{
- char vecsize_mangle = new_node->simdclone->vecsize_mangle;
- char mask = new_node->simdclone->inbranch ? 'M' : 'N';
- unsigned int simdlen = new_node->simdclone->simdlen;
+ char vecsize_mangle = clone_info->vecsize_mangle;
+ char mask = clone_info->inbranch ? 'M' : 'N';
+ unsigned int simdlen = clone_info->simdlen;
unsigned int n;
pretty_printer pp;
@@ -11529,9 +11476,9 @@ simd_clone_mangle (struct cgraph_node *o
pp_character (&pp, mask);
pp_decimal_int (&pp, simdlen);
- for (n = 0; n < new_node->simdclone->nargs; ++n)
+ for (n = 0; n < clone_info->nargs; ++n)
{
- struct simd_clone_arg arg = new_node->simdclone->args[n];
+ struct cgraph_simd_clone_arg arg = clone_info->args[n];
if (arg.arg_type == SIMD_CLONE_ARG_TYPE_UNIFORM)
pp_character (&pp, 'u');
@@ -11564,10 +11511,22 @@ simd_clone_mangle (struct cgraph_node *o
pp_underscore (&pp);
pp_string (&pp,
- IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (old_node->decl)));
+ IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (node->decl)));
const char *str = pp_formatted_text (&pp);
- change_decl_assembler_name (new_node->decl,
- get_identifier (str));
+
+ /* If there already is a SIMD clone with the same mangled name, don't
+ add another one. This can happen e.g. for
+ #pragma omp declare simd
+ #pragma omp declare simd simdlen(8)
+ int foo (int, int);
+ if the simdlen is assumed to be 8 for the first one, etc. */
+ for (struct cgraph_node *clone = node->simd_clones; clone;
+ clone = clone->simdclone->next_clone)
+ if (strcmp (IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (clone->decl)),
+ str) == 0)
+ return NULL_TREE;
+
+ return get_identifier (str);
}
/* Create a simd clone of OLD_NODE and return it. */
@@ -11576,18 +11535,28 @@ static struct cgraph_node *
simd_clone_create (struct cgraph_node *old_node)
{
struct cgraph_node *new_node;
- new_node = cgraph_function_versioning (old_node, vNULL, NULL, NULL, false,
- NULL, NULL, "simdclone");
-
- new_node->simdclone_of = old_node;
+ if (old_node->definition)
+ new_node = cgraph_function_versioning (old_node, vNULL, NULL, NULL, false,
+ NULL, NULL, "simdclone");
+ else
+ {
+ tree old_decl = old_node->decl;
+ tree new_decl = copy_node (old_node->decl);
+ DECL_NAME (new_decl) = clone_function_name (old_decl, "simdclone");
+ SET_DECL_ASSEMBLER_NAME (new_decl, DECL_NAME (new_decl));
+ SET_DECL_RTL (new_decl, NULL);
+ DECL_STATIC_CONSTRUCTOR (new_decl) = 0;
+ DECL_STATIC_DESTRUCTOR (new_decl) = 0;
+ new_node
+ = cgraph_copy_node_for_versioning (old_node, new_decl, vNULL, NULL);
+ cgraph_call_function_insertion_hooks (new_node);
+ }
+ if (new_node == NULL)
+ return new_node;
- /* Keep cgraph friends from removing the clone. */
- new_node->externally_visible
- = old_node->externally_visible;
TREE_PUBLIC (new_node->decl) = TREE_PUBLIC (old_node->decl);
- old_node->has_simd_clones = true;
- /* The function cgraph_function_versioning() will force the new
+ /* The function cgraph_function_versioning () will force the new
symbol local. Undo this, and inherit external visability from
the old node. */
new_node->local.local = old_node->local.local;
@@ -11605,39 +11574,48 @@ simd_clone_adjust_return_type (struct cg
{
tree fndecl = node->decl;
tree orig_rettype = TREE_TYPE (TREE_TYPE (fndecl));
+ unsigned int veclen;
+ tree t;
- tree t = DECL_RESULT (fndecl);
- /* Adjust the DECL_RESULT. */
- if (TREE_TYPE (t) != void_type_node)
- {
- TREE_TYPE (t)
- = build_vector_type (TREE_TYPE (t), node->simdclone->simdlen);
- DECL_MODE (t) = TYPE_MODE (TREE_TYPE (t));
- }
/* Adjust the function return type. */
- if (TREE_TYPE (TREE_TYPE (fndecl)) != void_type_node)
+ if (orig_rettype == void_type_node)
+ return NULL_TREE;
+ TREE_TYPE (fndecl) = build_distinct_type_copy (TREE_TYPE (fndecl));
+ if (INTEGRAL_TYPE_P (TREE_TYPE (TREE_TYPE (fndecl)))
+ || POINTER_TYPE_P (TREE_TYPE (TREE_TYPE (fndecl))))
+ veclen = node->simdclone->vecsize_int;
+ else
+ veclen = node->simdclone->vecsize_float;
+ veclen /= GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE (TREE_TYPE (fndecl))));
+ if (veclen > node->simdclone->simdlen)
+ veclen = node->simdclone->simdlen;
+ if (veclen == node->simdclone->simdlen)
+ TREE_TYPE (TREE_TYPE (fndecl))
+ = build_vector_type (TREE_TYPE (TREE_TYPE (fndecl)),
+ node->simdclone->simdlen);
+ else
{
- TREE_TYPE (fndecl)
- = copy_node (TREE_TYPE (fndecl));
- TREE_TYPE (TREE_TYPE (fndecl))
- = copy_node (TREE_TYPE (TREE_TYPE (fndecl)));
- TREE_TYPE (TREE_TYPE (fndecl))
- = build_vector_type (TREE_TYPE (TREE_TYPE (fndecl)),
- node->simdclone->simdlen);
+ t = build_vector_type (TREE_TYPE (TREE_TYPE (fndecl)), veclen);
+ t = build_array_type_nelts (t, node->simdclone->simdlen / veclen);
+ TREE_TYPE (TREE_TYPE (fndecl)) = t;
}
+ if (!node->definition)
+ return NULL_TREE;
+
+ t = DECL_RESULT (fndecl);
+ /* Adjust the DECL_RESULT. */
+ gcc_assert (TREE_TYPE (t) != void_type_node);
+ TREE_TYPE (t) = TREE_TYPE (TREE_TYPE (fndecl));
+ relayout_decl (t);
+
+ tree atype = build_array_type_nelts (orig_rettype,
+ node->simdclone->simdlen);
+ if (veclen != node->simdclone->simdlen)
+ return build1 (VIEW_CONVERT_EXPR, atype, t);
/* Set up a SIMD array to use as the return value. */
- tree retval;
- if (orig_rettype != void_type_node)
- {
- retval
- = create_tmp_var_raw (build_array_type_nelts (orig_rettype,
- node->simdclone->simdlen),
- "retval");
- gimple_add_tmp_var (retval);
- }
- else
- retval = NULL;
+ tree retval = create_tmp_var_raw (atype, "retval");
+ gimple_add_tmp_var (retval);
return retval;
}
@@ -11673,21 +11651,26 @@ create_tmp_simd_array (const char *prefi
static ipa_parm_adjustment_vec
simd_clone_adjust_argument_types (struct cgraph_node *node)
{
- argno_map args (node->decl);
+ vec<tree> args;
ipa_parm_adjustment_vec adjustments;
+ if (node->definition)
+ args = ipa_get_vector_of_formal_parms (node->decl);
+ else
+ args = simd_clone_vector_of_formal_parm_types (node->decl);
adjustments.create (args.length ());
- unsigned i;
+ unsigned i, j, veclen;
+ struct ipa_parm_adjustment adj;
for (i = 0; i < node->simdclone->nargs; ++i)
{
- struct ipa_parm_adjustment adj;
-
memset (&adj, 0, sizeof (adj));
tree parm = args[i];
+ tree parm_type = node->definition ? TREE_TYPE (parm) : parm;
adj.base_index = i;
adj.base = parm;
- node->simdclone->args[i].orig_arg = parm;
+ node->simdclone->args[i].orig_arg = node->definition ? parm : NULL_TREE;
+ node->simdclone->args[i].orig_type = parm_type;
if (node->simdclone->args[i].arg_type != SIMD_CLONE_ARG_TYPE_VECTOR)
{
@@ -11696,43 +11679,119 @@ simd_clone_adjust_argument_types (struct
}
else
{
- adj.simdlen = node->simdclone->simdlen;
- if (POINTER_TYPE_P (TREE_TYPE (parm)))
+ if (INTEGRAL_TYPE_P (parm_type) || POINTER_TYPE_P (parm_type))
+ veclen = node->simdclone->vecsize_int;
+ else
+ veclen = node->simdclone->vecsize_float;
+ veclen /= GET_MODE_BITSIZE (TYPE_MODE (parm_type));
+ if (veclen > node->simdclone->simdlen)
+ veclen = node->simdclone->simdlen;
+ adj.simdlen = veclen;
+ adj.arg_prefix = "simd";
+ if (POINTER_TYPE_P (parm_type))
adj.by_ref = 1;
- adj.type = TREE_TYPE (parm);
+ adj.type = parm_type;
+ node->simdclone->args[i].vector_type
+ = build_vector_type (parm_type, veclen);
+ for (j = veclen; j < node->simdclone->simdlen; j += veclen)
+ {
+ adjustments.safe_push (adj);
+ if (j == veclen)
+ {
+ memset (&adj, 0, sizeof (adj));
+ adj.op = IPA_PARM_OP_NEW;
+ adj.arg_prefix = "simd";
+ adj.base_index = i;
+ adj.type = node->simdclone->args[i].vector_type;
+ }
+ }
- node->simdclone->args[i].simd_array
- = create_tmp_simd_array (IDENTIFIER_POINTER (DECL_NAME (parm)),
- TREE_TYPE (parm),
- node->simdclone->simdlen);
+ if (node->definition)
+ node->simdclone->args[i].simd_array
+ = create_tmp_simd_array (IDENTIFIER_POINTER (DECL_NAME (parm)),
+ parm_type, node->simdclone->simdlen);
}
- adj.arg_prefix = "simd";
- adjustments.quick_push (adj);
+ adjustments.safe_push (adj);
}
if (node->simdclone->inbranch)
{
- struct ipa_parm_adjustment adj;
+ tree base_type
+ = simd_clone_compute_base_data_type (node->simdclone->origin,
+ node->simdclone);
memset (&adj, 0, sizeof (adj));
adj.op = IPA_PARM_OP_NEW;
adj.arg_prefix = "mask";
+
adj.base_index = i;
- adj.type
- = build_vector_type (integer_type_node, node->simdclone->simdlen);
+ if (INTEGRAL_TYPE_P (base_type) || POINTER_TYPE_P (base_type))
+ veclen = node->simdclone->vecsize_int;
+ else
+ veclen = node->simdclone->vecsize_float;
+ veclen /= GET_MODE_BITSIZE (TYPE_MODE (base_type));
+ if (veclen > node->simdclone->simdlen)
+ veclen = node->simdclone->simdlen;
+ adj.type = build_vector_type (base_type, veclen);
adjustments.safe_push (adj);
+ for (j = veclen; j < node->simdclone->simdlen; j += veclen)
+ adjustments.safe_push (adj);
+
/* We have previously allocated one extra entry for the mask. Use
it and fill it. */
- struct simd_clone *sc = node->simdclone;
+ struct cgraph_simd_clone *sc = node->simdclone;
sc->nargs++;
- sc->args[i].orig_arg = build_decl (UNKNOWN_LOCATION, PARM_DECL, NULL,
- integer_type_node);
- sc->args[i].simd_array
- = create_tmp_simd_array ("mask", integer_type_node, sc->simdlen);
+ if (node->definition)
+ {
+ sc->args[i].orig_arg
+ = build_decl (UNKNOWN_LOCATION, PARM_DECL, NULL, base_type);
+ sc->args[i].simd_array
+ = create_tmp_simd_array ("mask", base_type, sc->simdlen);
+ }
+ sc->args[i].orig_type = base_type;
+ sc->args[i].arg_type = SIMD_CLONE_ARG_TYPE_MASK;
}
- ipa_modify_formal_parameters (node->decl, adjustments);
+ if (node->definition)
+ ipa_modify_formal_parameters (node->decl, adjustments);
+ else
+ {
+ tree new_arg_types = NULL_TREE, new_reversed;
+ bool last_parm_void = false;
+ if (args.length () > 0 && args.last () == void_type_node)
+ last_parm_void = true;
+
+ gcc_assert (TYPE_ARG_TYPES (TREE_TYPE (node->decl)));
+ j = adjustments.length ();
+ for (i = 0; i < j; i++)
+ {
+ struct ipa_parm_adjustment *adj = &adjustments[i];
+ tree ptype;
+ if (adj->op == IPA_PARM_OP_COPY)
+ ptype = args[adj->base_index];
+ else if (adj->simdlen)
+ ptype = build_vector_type (adj->type, adj->simdlen);
+ else
+ ptype = adj->type;
+ new_arg_types = tree_cons (NULL_TREE, ptype, new_arg_types);
+ }
+ new_reversed = nreverse (new_arg_types);
+ if (last_parm_void)
+ {
+ if (new_reversed)
+ TREE_CHAIN (new_arg_types) = void_list_node;
+ else
+ new_reversed = void_list_node;
+ }
+
+ tree new_type = build_distinct_type_copy (TREE_TYPE (node->decl));
+ TYPE_ARG_TYPES (new_type) = new_reversed;
+ TREE_TYPE (node->decl) = new_type;
+
+ adjustments.release ();
+ }
+ args.release ();
return adjustments;
}
@@ -11745,21 +11804,50 @@ simd_clone_init_simd_arrays (struct cgra
ipa_parm_adjustment_vec adjustments)
{
gimple_seq seq = NULL;
- unsigned i = 0;
+ unsigned i = 0, j = 0, k;
for (tree arg = DECL_ARGUMENTS (node->decl);
arg;
- arg = DECL_CHAIN (arg), i++)
+ arg = DECL_CHAIN (arg), i++, j++)
{
- if (adjustments[i].op == IPA_PARM_OP_COPY)
+ if (adjustments[j].op == IPA_PARM_OP_COPY)
continue;
node->simdclone->args[i].vector_arg = arg;
tree array = node->simdclone->args[i].simd_array;
- tree t = build1 (VIEW_CONVERT_EXPR, TREE_TYPE (array), arg);
- t = build2 (MODIFY_EXPR, TREE_TYPE (array), array, t);
- gimplify_and_add (t, &seq);
+ if ((unsigned) adjustments[j].simdlen == node->simdclone->simdlen)
+ {
+ tree ptype = build_pointer_type (TREE_TYPE (TREE_TYPE (array)));
+ tree ptr = build_fold_addr_expr (array);
+ tree t = build2 (MEM_REF, TREE_TYPE (arg), ptr,
+ build_int_cst (ptype, 0));
+ t = build2 (MODIFY_EXPR, TREE_TYPE (t), t, arg);
+ gimplify_and_add (t, &seq);
+ }
+ else
+ {
+ unsigned int simdlen = adjustments[j].simdlen;
+ if (node->simdclone->args[i].arg_type == SIMD_CLONE_ARG_TYPE_MASK)
+ simdlen = TYPE_VECTOR_SUBPARTS (TREE_TYPE (arg));
+ tree ptype = build_pointer_type (TREE_TYPE (TREE_TYPE (array)));
+ for (k = 0; k < node->simdclone->simdlen; k += simdlen)
+ {
+ tree ptr = build_fold_addr_expr (array);
+ int elemsize;
+ if (k)
+ {
+ arg = DECL_CHAIN (arg);
+ j++;
+ }
+ elemsize
+ = GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (TREE_TYPE (arg))));
+ tree t = build2 (MEM_REF, TREE_TYPE (arg), ptr,
+ build_int_cst (ptype, k * elemsize));
+ t = build2 (MODIFY_EXPR, TREE_TYPE (t), t, arg);
+ gimplify_and_add (t, &seq);
+ }
+ }
}
return seq;
}
@@ -11850,22 +11938,26 @@ ipa_simd_modify_function_body (struct cg
tree retval_array, tree iter)
{
basic_block bb;
+ unsigned int i, j;
/* Re-use the adjustments array, but this time use it to replace
every function argument use to an offset into the corresponding
simd_array. */
- for (unsigned i = 0; i < node->simdclone->nargs; ++i)
+ for (i = 0, j = 0; i < node->simdclone->nargs; ++i, ++j)
{
if (!node->simdclone->args[i].vector_arg)
continue;
tree basetype = TREE_TYPE (node->simdclone->args[i].orig_arg);
- adjustments[i].new_decl
+ adjustments[j].new_decl
= build4 (ARRAY_REF,
basetype,
node->simdclone->args[i].simd_array,
iter,
NULL_TREE, NULL_TREE);
+ if (adjustments[j].op == IPA_PARM_OP_NONE
+ && (unsigned) adjustments[j].simdlen < node->simdclone->simdlen)
+ j += node->simdclone->simdlen / adjustments[j].simdlen - 1;
}
struct modify_stmt_info info;
@@ -11885,32 +11977,23 @@ ipa_simd_modify_function_body (struct cg
memset (&wi, 0, sizeof (wi));
info.modified = false;
wi.info = &info;
+ walk_gimple_op (stmt, ipa_simd_modify_stmt_ops, &wi);
- switch (gimple_code (stmt))
+ if (gimple_code (stmt) == GIMPLE_RETURN)
{
- case GIMPLE_RETURN:
- {
- tree retval = gimple_return_retval (stmt);
- if (!retval)
- {
- gsi_remove (&gsi, true);
- continue;
- }
-
- /* Replace `return foo' with `retval_array[iter] = foo'. */
- tree ref = build4 (ARRAY_REF,
- TREE_TYPE (retval),
- retval_array, iter,
- NULL, NULL);
- stmt = gimple_build_assign (ref, retval);
- gsi_replace (&gsi, stmt, true);
- info.modified = true;
- }
- break;
-
- default:
- walk_gimple_op (stmt, ipa_simd_modify_stmt_ops, &wi);
- break;
+ tree retval = gimple_return_retval (stmt);
+ if (!retval)
+ {
+ gsi_remove (&gsi, true);
+ continue;
+ }
+
+ /* Replace `return foo' with `retval_array[iter] = foo'. */
+ tree ref = build4 (ARRAY_REF, TREE_TYPE (retval),
+ retval_array, iter, NULL, NULL);
+ stmt = gimple_build_assign (ref, retval);
+ gsi_replace (&gsi, stmt, true);
+ info.modified = true;
}
if (info.modified)
@@ -11930,26 +12013,13 @@ ipa_simd_modify_function_body (struct cg
static void
simd_clone_adjust (struct cgraph_node *node)
{
- // FIXME: -------ABI STUFF--------
- // 0. Create clones for externs.
- // 1. Arguments split across multiple args.
- // 2. Which registers to pass in.
- // 3. Get mangling correct for x86*
- // 4. Agree on what default clones to generate when simdlen() missing.
-
- // FIXME: ------- VECTORIZER CHANGES -------
- // 1. At least the easy, notinbranch cases.
- // 2. Handle linear/uniform arguments in get_simd_clone/etc.
- // 3. Bail on non-SLP vectorizer mode.
-
- // FIXME: __attribute__((target (something))) if needed
-
- // FIXME: get_simd_clone() needs optimization.
-
push_cfun (DECL_STRUCT_FUNCTION (node->decl));
+ targetm.simd_clone.adjust (node);
+
tree retval = simd_clone_adjust_return_type (node);
- ipa_parm_adjustment_vec adjustments = simd_clone_adjust_argument_types (node);
+ ipa_parm_adjustment_vec adjustments
+ = simd_clone_adjust_argument_types (node);
struct gimplify_ctx gctx;
push_gimplify_context (&gctx);
@@ -11959,16 +12029,11 @@ simd_clone_adjust (struct cgraph_node *n
/* Adjust all uses of vector arguments accordingly. Adjust all
return values accordingly. */
tree iter = create_tmp_var (unsigned_type_node, "iter");
- ipa_simd_modify_function_body (node, adjustments, retval, iter);
+ tree iter1 = make_ssa_name (iter, NULL);
+ tree iter2 = make_ssa_name (iter, NULL);
+ ipa_simd_modify_function_body (node, adjustments, retval, iter1);
/* Initialize the iteration variable. */
- gimple g
- = gimple_build_assign_with_ops (INTEGER_CST,
- iter,
- build_int_cst (unsigned_type_node, 0),
- NULL_TREE);
- gimple_seq_add_stmt (&seq, g);
-
basic_block entry_bb = single_succ (ENTRY_BLOCK_PTR);
basic_block body_bb = split_block_after_labels (entry_bb)->dest;
gimple_stmt_iterator gsi = gsi_after_labels (entry_bb);
@@ -11994,8 +12059,9 @@ simd_clone_adjust (struct cgraph_node *n
edge e = make_edge (incr_bb, EXIT_BLOCK_PTR, 0);
e->probability = REG_BR_PROB_BASE;
gsi = gsi_last_bb (incr_bb);
- g = gimple_build_assign_with_ops (PLUS_EXPR, iter, iter,
- build_int_cst (unsigned_type_node, 1));
+ gimple g = gimple_build_assign_with_ops (PLUS_EXPR, iter2, iter1,
+ build_int_cst (unsigned_type_node,
+ 1));
gsi_insert_after (&gsi, g, GSI_CONTINUE_LINKING);
/* Mostly annotate the loop for the vectorizer (the rest is done below). */
@@ -12012,15 +12078,24 @@ simd_clone_adjust (struct cgraph_node *n
gimple_stmt_iterator gsi = gsi_last_bb (loop->header);
tree mask_array
= node->simdclone->args[node->simdclone->nargs - 1].simd_array;
- tree mask = create_tmp_var (integer_type_node, NULL);
+ tree mask = make_ssa_name (TREE_TYPE (TREE_TYPE (mask_array)), NULL);
tree aref = build4 (ARRAY_REF,
- integer_type_node,
- mask_array, iter,
+ TREE_TYPE (TREE_TYPE (mask_array)),
+ mask_array, iter1,
NULL, NULL);
g = gimple_build_assign (mask, aref);
gsi_insert_after (&gsi, g, GSI_CONTINUE_LINKING);
+ int bitsize = GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE (aref)));
+ if (!INTEGRAL_TYPE_P (TREE_TYPE (aref)))
+ {
+ aref = build1 (VIEW_CONVERT_EXPR,
+ build_nonstandard_integer_type (bitsize, 0), mask);
+ mask = make_ssa_name (TREE_TYPE (aref), NULL);
+ g = gimple_build_assign (mask, aref);
+ gsi_insert_after (&gsi, g, GSI_CONTINUE_LINKING);
+ }
- g = gimple_build_cond (EQ_EXPR, mask, integer_zero_node,
+ g = gimple_build_cond (EQ_EXPR, mask, build_zero_cst (TREE_TYPE (mask)),
NULL, NULL);
gsi_insert_after (&gsi, g, GSI_CONTINUE_LINKING);
make_edge (loop->header, incr_bb, EDGE_TRUE_VALUE);
@@ -12029,7 +12104,7 @@ simd_clone_adjust (struct cgraph_node *n
/* Generate the condition. */
g = gimple_build_cond (LT_EXPR,
- iter,
+ iter2,
build_int_cst (unsigned_type_node,
node->simdclone->simdlen),
NULL, NULL);
@@ -12048,9 +12123,20 @@ simd_clone_adjust (struct cgraph_node *n
make_edge (incr_bb, latch_bb, EDGE_TRUE_VALUE); */
FALLTHRU_EDGE (incr_bb)->flags = EDGE_TRUE_VALUE;
+ gimple phi = create_phi_node (iter1, body_bb);
+ edge preheader_edge = find_edge (entry_bb, body_bb);
+ edge latch_edge = single_succ_edge (latch_bb);
+ add_phi_arg (phi, build_zero_cst (unsigned_type_node), preheader_edge,
+ UNKNOWN_LOCATION);
+ add_phi_arg (phi, iter2, latch_edge, UNKNOWN_LOCATION);
+
/* Generate the new return. */
gsi = gsi_last_bb (new_exit_bb);
- if (retval)
+ if (retval
+ && TREE_CODE (retval) == VIEW_CONVERT_EXPR
+ && TREE_CODE (TREE_OPERAND (retval, 0)) == RESULT_DECL)
+ retval = TREE_OPERAND (retval, 0);
+ else if (retval)
{
retval = build1 (VIEW_CONVERT_EXPR,
TREE_TYPE (TREE_TYPE (node->decl)),
@@ -12061,6 +12147,99 @@ simd_clone_adjust (struct cgraph_node *n
g = gimple_build_return (retval);
gsi_insert_after (&gsi, g, GSI_CONTINUE_LINKING);
+ /* Handle aligned clauses by replacing default defs of the aligned
+ uniform args with __builtin_assume_aligned (arg_N(D), alignment)
+ lhs. Handle linear by adding PHIs. */
+ for (unsigned i = 0; i < node->simdclone->nargs; i++)
+ if (node->simdclone->args[i].alignment
+ && node->simdclone->args[i].arg_type == SIMD_CLONE_ARG_TYPE_UNIFORM
+ && (node->simdclone->args[i].alignment
+ & (node->simdclone->args[i].alignment - 1)) == 0
+ && TREE_CODE (TREE_TYPE (node->simdclone->args[i].orig_arg))
+ == POINTER_TYPE)
+ {
+ unsigned int alignment = node->simdclone->args[i].alignment;
+ tree orig_arg = node->simdclone->args[i].orig_arg;
+ tree def = ssa_default_def (cfun, orig_arg);
+ if (!has_zero_uses (def))
+ {
+ tree fn = builtin_decl_explicit (BUILT_IN_ASSUME_ALIGNED);
+ gimple_seq seq = NULL;
+ bool need_cvt = false;
+ gimple call
+ = gimple_build_call (fn, 2, def, size_int (alignment));
+ g = call;
+ if (!useless_type_conversion_p (TREE_TYPE (orig_arg),
+ ptr_type_node))
+ need_cvt = true;
+ tree t = make_ssa_name (need_cvt ? ptr_type_node : orig_arg, NULL);
+ gimple_call_set_lhs (g, t);
+ gimple_seq_add_stmt_without_update (&seq, g);
+ if (need_cvt)
+ {
+ t = make_ssa_name (orig_arg, NULL);
+ g = gimple_build_assign_with_ops (NOP_EXPR, t,
+ gimple_call_lhs (g),
+ NULL_TREE);
+ gimple_seq_add_stmt_without_update (&seq, g);
+ }
+ gsi_insert_seq_on_edge_immediate
+ (single_succ_edge (ENTRY_BLOCK_PTR), seq);
+
+ entry_bb = single_succ (ENTRY_BLOCK_PTR);
+ int freq = compute_call_stmt_bb_frequency (current_function_decl,
+ entry_bb);
+ cgraph_create_edge (node, cgraph_get_create_node (fn),
+ call, entry_bb->count, freq);
+
+ imm_use_iterator iter;
+ use_operand_p use_p;
+ gimple use_stmt;
+ tree repl = gimple_get_lhs (g);
+ FOR_EACH_IMM_USE_STMT (use_stmt, iter, def)
+ if (is_gimple_debug (use_stmt) || use_stmt == call)
+ continue;
+ else
+ FOR_EACH_IMM_USE_ON_STMT (use_p, iter)
+ SET_USE (use_p, repl);
+ }
+ }
+ else if (node->simdclone->args[i].arg_type
+ == SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP)
+ {
+ tree orig_arg = node->simdclone->args[i].orig_arg;
+ tree def = ssa_default_def (cfun, orig_arg);
+ gcc_assert (INTEGRAL_TYPE_P (TREE_TYPE (orig_arg))
+ || POINTER_TYPE_P (TREE_TYPE (orig_arg)));
+ if (!has_zero_uses (def))
+ {
+ iter1 = make_ssa_name (orig_arg, NULL);
+ iter2 = make_ssa_name (orig_arg, NULL);
+ phi = create_phi_node (iter1, body_bb);
+ add_phi_arg (phi, def, preheader_edge, UNKNOWN_LOCATION);
+ add_phi_arg (phi, iter2, latch_edge, UNKNOWN_LOCATION);
+ enum tree_code code = INTEGRAL_TYPE_P (TREE_TYPE (orig_arg))
+ ? PLUS_EXPR : POINTER_PLUS_EXPR;
+ tree addtype = INTEGRAL_TYPE_P (TREE_TYPE (orig_arg))
+ ? TREE_TYPE (orig_arg) : sizetype;
+ tree addcst
+ = build_int_cst (addtype, node->simdclone->args[i].linear_step);
+ g = gimple_build_assign_with_ops (code, iter2, iter1, addcst);
+ gsi = gsi_last_bb (incr_bb);
+ gsi_insert_before (&gsi, g, GSI_SAME_STMT);
+
+ imm_use_iterator iter;
+ use_operand_p use_p;
+ gimple use_stmt;
+ FOR_EACH_IMM_USE_STMT (use_stmt, iter, def)
+ if (use_stmt == phi)
+ continue;
+ else
+ FOR_EACH_IMM_USE_ON_STMT (use_p, iter)
+ SET_USE (use_p, iter1);
+ }
+ }
+
calculate_dominance_info (CDI_DOMINATORS);
add_loop (loop, loop->header->loop_father);
update_ssa (TODO_update_ssa);
@@ -12074,38 +12253,88 @@ simd_clone_adjust (struct cgraph_node *n
static void
expand_simd_clones (struct cgraph_node *node)
{
- if (cgraph_function_body_availability (node) < AVAIL_OVERWRITABLE)
+ if (lookup_attribute ("noclone", DECL_ATTRIBUTES (node->decl)))
return;
tree attr = lookup_attribute ("omp declare simd",
DECL_ATTRIBUTES (node->decl));
- if (!attr)
+ if (!attr || targetm.simd_clone.compute_vecsize_and_simdlen == NULL)
+ return;
+ /* Ignore
+ #pragma omp declare simd
+ extern int foo ();
+ in C, there we don't know the argument types at all. */
+ if (!node->definition
+ && TYPE_ARG_TYPES (TREE_TYPE (node->decl)) == NULL_TREE)
return;
do
{
- struct cgraph_node *new_node = simd_clone_create (node);
-
bool inbranch_clause_specified;
- simd_clone_clauses_extract (new_node, TREE_VALUE (attr),
- &inbranch_clause_specified);
- simd_clone_compute_vecsize_and_simdlen (new_node);
- simd_clone_mangle (node, new_node);
- simd_clone_adjust (new_node);
-
- /* If no inbranch clause was specified, we need both variants.
- We have already created the not-in-branch version above, by
- virtue of .inbranch being clear. Create the masked in-branch
- version. */
- if (!inbranch_clause_specified)
+ struct cgraph_simd_clone *clone_info
+ = simd_clone_clauses_extract (node, TREE_VALUE (attr),
+ &inbranch_clause_specified);
+ if (clone_info == NULL)
+ continue;
+
+ int orig_simdlen = clone_info->simdlen;
+ tree base_type = simd_clone_compute_base_data_type (node, clone_info);
+ int count
+ = targetm.simd_clone.compute_vecsize_and_simdlen (node, clone_info,
+ base_type, 0);
+ if (count == 0)
+ continue;
+
+ for (int i = 0; i < count * 2; i++)
{
+ struct cgraph_simd_clone *clone = clone_info;
+ if (inbranch_clause_specified && (i & 1) != 0)
+ continue;
+
+ if (i != 0)
+ {
+ clone = simd_clone_struct_alloc (clone_info->nargs
+ - clone_info->inbranch
+ + ((i & 1) != 0));
+ simd_clone_struct_copy (clone, clone_info);
+ clone->nargs -= clone_info->inbranch;
+ clone->simdlen = orig_simdlen;
+ targetm.simd_clone.compute_vecsize_and_simdlen (node, clone,
+ base_type,
+ i / 2);
+ if ((i & 1) != 0)
+ clone->inbranch = 1;
+ }
+
+ tree id = simd_clone_mangle (node, clone);
+ if (id == NULL_TREE)
+ continue;
+
struct cgraph_node *n = simd_clone_create (node);
- struct simd_clone *clone
- = simd_clone_struct_alloc (new_node->simdclone->nargs);
- simd_clone_struct_copy (clone, new_node->simdclone);
- clone->inbranch = 1;
+ if (n == NULL)
+ continue;
+
n->simdclone = clone;
- simd_clone_mangle (node, n);
- simd_clone_adjust (n);
+ clone->origin = node;
+ clone->next_clone = NULL;
+ if (node->simd_clones == NULL)
+ {
+ clone->prev_clone = n;
+ node->simd_clones = n;
+ }
+ else
+ {
+ clone->prev_clone = node->simd_clones->simdclone->prev_clone;
+ clone->prev_clone->simdclone->next_clone = n;
+ node->simd_clones->simdclone->prev_clone = n;
+ }
+ change_decl_assembler_name (n->decl, id);
+ if (node->definition)
+ simd_clone_adjust (n);
+ else
+ {
+ simd_clone_adjust_return_type (n);
+ simd_clone_adjust_argument_types (n);
+ }
}
}
while ((attr = lookup_attribute ("omp declare simd", TREE_CHAIN (attr))));
@@ -12117,7 +12346,7 @@ static unsigned int
ipa_omp_simd_clone (void)
{
struct cgraph_node *node;
- FOR_EACH_DEFINED_FUNCTION (node)
+ FOR_EACH_FUNCTION (node)
expand_simd_clones (node);
return 0;
}
@@ -12147,7 +12376,8 @@ public:
{}
/* opt_pass methods: */
- bool gate () { return flag_openmp || flag_enable_cilkplus; }
+ bool gate () { return flag_openmp || flag_openmp_simd
+ || flag_enable_cilkplus; }
unsigned int execute () { return ipa_omp_simd_clone (); }
};
@@ -3641,8 +3641,9 @@ c_builtin_function_ext_scope (tree decl)
const char *name = IDENTIFIER_POINTER (id);
C_DECL_BUILTIN_PROTOTYPE (decl) = prototype_p (type);
- bind (id, decl, external_scope, /*invisible=*/false, /*nested=*/false,
- UNKNOWN_LOCATION);
+ if (external_scope)
+ bind (id, decl, external_scope, /*invisible=*/false, /*nested=*/false,
+ UNKNOWN_LOCATION);
/* Builtins in the implementation namespace are made visible without
needing to be explicitly declared. See push_file_scope. */
@@ -429,13 +429,15 @@ determine_versionability (struct cgraph_
reason = "not a tree_versionable_function";
else if (cgraph_function_body_availability (node) <= AVAIL_OVERWRITABLE)
reason = "insufficient body availability";
- else if (node->has_simd_clones)
+ else if (node->simd_clones != NULL)
{
/* Ideally we should clone the SIMD clones themselves and create
vector copies of them, so IPA-cp and SIMD clones can happily
coexist, but that may not be worth the effort. */
reason = "function has SIMD clones";
}
+ else if (node->simdclone != NULL)
+ reason = "function is SIMD clone";
if (reason && dump_file && !node->alias && !node->thunk.thunk_p)
fprintf (dump_file, "Function %s/%i is not versionable, reason: %s.\n",
@@ -702,6 +704,8 @@ initialize_node_lattices (struct cgraph_
else
disable = true;
}
+ else if (node->simdclone)
+ disable = true;
if (disable || variable)
{
@@ -3068,29 +3068,4 @@ gimple_check_call_matching_types (gimple
return true;
}
-/* Given a NODE, return a compatible SIMD clone returning `vectype'.
- If none found, NULL is returned. */
-
-struct cgraph_node *
-get_simd_clone (struct cgraph_node *node, tree vectype)
-{
- if (!node->has_simd_clones)
- return NULL;
-
- /* FIXME: What to do with linear/uniform arguments. */
-
- /* FIXME: Nasty kludge until we figure out where to put the clone
- list-- perhaps, next_sibling_clone/prev_sibling_clone in
- cgraph_node ??. */
- struct cgraph_node *t;
- FOR_EACH_FUNCTION (t)
- if (t->simdclone_of == node
- /* No inbranch vectorization for now. */
- && !t->simdclone->inbranch
- && types_compatible_p (TREE_TYPE (TREE_TYPE (t->decl)),
- vectype))
- break;
- return t;
-}
-
#include "gt-cgraph.h"
@@ -245,7 +245,7 @@ walk_polymorphic_call_targets (pointer_s
hope calls to them will be devirtualized.
Again we remove them after inlining. In late optimization some
- devirtualization may happen, but it is not importnat since we won't inline
+ devirtualization may happen, but it is not important since we won't inline
the call. In theory early opts and IPA should work out all important cases.
- virtual clones needs bodies of their origins for later materialization;
@@ -273,7 +273,7 @@ walk_polymorphic_call_targets (pointer_s
by reachable symbols or origins of clones). The queue is represented
as linked list by AUX pointer terminated by 1.
- A the end we keep all reachable symbols. For symbols in boundary we always
+ At the end we keep all reachable symbols. For symbols in boundary we always
turn definition into a declaration, but we may keep function body around
based on body_needed_for_clonning
@@ -425,6 +425,19 @@ symtab_remove_unreachable_nodes (bool be
enqueue_node (cnode, &first, reachable);
}
}
+
+ }
+ /* If any reachable function has simd clones, mark them as
+ reachable as well. */
+ if (cnode->simd_clones)
+ {
+ cgraph_node *next;
+ for (next = cnode->simd_clones;
+ next;
+ next = next->simdclone->next_clone)
+ if (in_boundary_p
+ || !pointer_set_insert (reachable, next))
+ enqueue_node (next, &first, reachable);
}
}
/* When we see constructor of external variable, keep referred nodes in the
@@ -5210,6 +5210,8 @@ finish_omp_clauses (tree clauses)
t = mark_rvalue_use (t);
if (!processing_template_decl)
{
+ if (TREE_CODE (OMP_CLAUSE_DECL (c)) == PARM_DECL)
+ t = maybe_constant_value (t);
t = fold_build_cleanup_point_expr (TREE_TYPE (t), t);
if (TREE_CODE (TREE_TYPE (OMP_CLAUSE_DECL (c)))
== POINTER_TYPE)
@@ -694,6 +694,7 @@ typedef struct ipa_parm_adjustment ipa_p
typedef vec<ipa_parm_adjustment_t> ipa_parm_adjustment_vec;
vec<tree> ipa_get_vector_of_formal_parms (tree fndecl);
+vec<tree> ipa_get_vector_of_formal_parm_types (tree fntype);
void ipa_modify_formal_parameters (tree fndecl, ipa_parm_adjustment_vec);
void ipa_modify_call_arguments (struct cgraph_edge *, gimple,
ipa_parm_adjustment_vec);
@@ -3347,8 +3347,8 @@ ipa_get_vector_of_formal_parms (tree fnd
/* Return a heap allocated vector containing types of formal parameters of
function type FNTYPE. */
-static inline vec<tree>
-get_vector_of_formal_parm_types (tree fntype)
+vec<tree>
+ipa_get_vector_of_formal_parm_types (tree fntype)
{
vec<tree> types;
int count = 0;
@@ -3385,7 +3385,7 @@ ipa_modify_formal_parameters (tree fndec
{
last_parm_void = (TREE_VALUE (tree_last (old_arg_types))
== void_type_node);
- otypes = get_vector_of_formal_parm_types (orig_type);
+ otypes = ipa_get_vector_of_formal_parm_types (orig_type);
if (last_parm_void)
gcc_assert (oparms.length () + 1 == otypes.length ());
else
@@ -1521,34 +1521,35 @@ hook_int_uint_mode_1)
HOOK_VECTOR_END (sched)
-/* Functions relating to Cilk Plus. */
+/* Functions relating to OpenMP and Cilk Plus SIMD clones. */
#undef HOOK_PREFIX
-#define HOOK_PREFIX "TARGET_CILKPLUS_"
-HOOK_VECTOR (TARGET_CILKPLUS, cilkplus)
+#define HOOK_PREFIX "TARGET_SIMD_CLONE_"
+HOOK_VECTOR (TARGET_SIMD_CLONE, simd_clone)
DEFHOOK
-(default_vecsize_mangle,
-"This hook should return the default mangling character when no vector\n\
-size can be determined by examining the Cilk Plus @code{processor} clause.\n\
-This is as specified in the Intel Vector ABI document.\n\
-\n\
-This hook, as well as @code{max_vector_size_for_isa} below must be set\n\
-to support the Cilk Plus @code{processor} clause.\n\
-\n\
-The only argument is a @var{cgraph_node} containing the clone.",
-char, (struct cgraph_node *), NULL)
+(compute_vecsize_and_simdlen,
+"This hook should set @var{vecsize_mangle}, @var{vecsize_int}, @var{vecsize_float}\n\
+fields in @var{simd_clone} structure pointed by @var{clone_info} argument and also\n\
+@var{simdlen} field if it was previously 0.\n\
+The hook should return 0 if SIMD clones shouldn't be emitted,\n\
+or number of @var{vecsize_mangle} variants that should be emitted.",
+int, (struct cgraph_node *, struct cgraph_simd_clone *, tree, int), NULL)
DEFHOOK
-(vecsize_for_mangle,
-"This hook returns the maximum hardware vector size in bits for a given\n\
-mangling character. The character is as described in Intel's\n\
-Vector ABI (see @var{ISA} character in the section on mangling).\n\
-\n\
-This hook must be defined in order to support the Cilk Plus @code{processor}\n\
-clause.",
-unsigned int, (char), NULL)
+(adjust,
+"This hook should add implicit @code{attribute(target(\"...\"))} attribute\n\
+to SIMD clone @var{node} if needed.",
+void, (struct cgraph_node *), NULL)
-HOOK_VECTOR_END (cilkplus)
+DEFHOOK
+(usable,
+"This hook should return -1 if SIMD clone @var{node} shouldn't be used\n\
+in vectorized loops in current function, or non-negative number if it is\n\
+usable. In that case, the smaller the number is, the more desirable it is\n\
+to use it.",
+int, (struct cgraph_node *), NULL)
+
+HOOK_VECTOR_END (simd_clone)
/* Functions relating to vectorization. */
#undef HOOK_PREFIX
@@ -3189,7 +3189,7 @@ vect_analyze_data_refs (loop_vec_info lo
if (fndecl != NULL_TREE)
{
struct cgraph_node *node = cgraph_get_node (fndecl);
- if (node != NULL && node->has_simd_clones)
+ if (node != NULL && node->simd_clones != NULL)
{
unsigned int j, n = gimple_call_num_args (stmt);
for (j = 0; j < n; j++)
@@ -93,6 +93,8 @@ extern bool target_default_pointer_addre
struct stdarg_info;
struct spec_info_def;
struct hard_reg_set_container;
+struct cgraph_node;
+struct cgraph_simd_clone;
/* The struct used by the secondary_reload target hook. */
typedef struct secondary_reload_info
@@ -4422,9 +4422,11 @@ address; but often a machine-dependent
@hook TARGET_VECTORIZE_BUILTIN_GATHER
-@hook TARGET_CILKPLUS_DEFAULT_VECSIZE_MANGLE
+@hook TARGET_SIMD_CLONE_COMPUTE_VECSIZE_AND_SIMDLEN
-@hook TARGET_CILKPLUS_VECSIZE_FOR_MANGLE
+@hook TARGET_SIMD_CLONE_ADJUST
+
+@hook TARGET_SIMD_CLONE_USABLE
@node Anchored Addresses
@section Anchored Addresses
@@ -5818,24 +5818,24 @@ The default is @code{NULL_TREE} which me
loads.
@end deftypefn
-@deftypefn {Target Hook} char TARGET_CILKPLUS_DEFAULT_VECSIZE_MANGLE (struct cgraph_node *@var{})
-This hook should return the default mangling character when no vector
-size can be determined by examining the Cilk Plus @code{processor} clause.
-This is as specified in the Intel Vector ABI document.
-
-This hook, as well as @code{max_vector_size_for_isa} below must be set
-to support the Cilk Plus @code{processor} clause.
-
-The only argument is a @var{cgraph_node} containing the clone.
+@deftypefn {Target Hook} int TARGET_SIMD_CLONE_COMPUTE_VECSIZE_AND_SIMDLEN (struct cgraph_node *@var{}, struct cgraph_simd_clone *@var{}, @var{tree}, @var{int})
+This hook should set @var{vecsize_mangle}, @var{vecsize_int}, @var{vecsize_float}
+fields in @var{simd_clone} structure pointed by @var{clone_info} argument and also
+@var{simdlen} field if it was previously 0.
+The hook should return 0 if SIMD clones shouldn't be emitted,
+or number of @var{vecsize_mangle} variants that should be emitted.
@end deftypefn
-@deftypefn {Target Hook} {unsigned int} TARGET_CILKPLUS_VECSIZE_FOR_MANGLE (char)
-This hook returns the maximum hardware vector size in bits for a given
-mangling character. The character is as described in Intel's
-Vector ABI (see @var{ISA} character in the section on mangling).
+@deftypefn {Target Hook} void TARGET_SIMD_CLONE_ADJUST (struct cgraph_node *@var{})
+This hook should add implicit @code{attribute(target("..."))} attribute
+to SIMD clone @var{node} if needed.
+@end deftypefn
-This hook must be defined in order to support the Cilk Plus @code{processor}
-clause.
+@deftypefn {Target Hook} int TARGET_SIMD_CLONE_USABLE (struct cgraph_node *@var{})
+This hook should return -1 if SIMD clone @var{node} shouldn't be used
+in vectorized loops in current function, or non-negative number if it is
+usable. In that case, the smaller the number is, the more desirable it is
+to use it.
@end deftypefn
@node Anchored Addresses
@@ -239,5 +239,5 @@ struct D
void
f38 (D &d)
{
- d.f37 <12> (6);
+ d.f37 <16> (6);
}
@@ -1,8 +1,7 @@
-/* { dg-do compile { target i?86-*-* x86_64-*-* } } */
-/* { dg-options "-fopenmp -fdump-tree-optimized -O -msse2" } */
+/* { dg-options "-fopenmp -fdump-tree-optimized -O" } */
-#pragma omp declare simd inbranch uniform(c) linear(b:66) // addit.simdclone.2
-#pragma omp declare simd notinbranch aligned(c:32) // addit.simdclone.1
+#pragma omp declare simd inbranch uniform(c) linear(b:66)
+#pragma omp declare simd notinbranch aligned(c:32)
int addit(int a, int b, int *c)
{
return a + b;
@@ -15,7 +14,13 @@ float setArray(float *a, float x, int k)
return a[k];
}
-/* { dg-final { scan-tree-dump "clone.0 \\(_ZGVxN4ua32vl_setArray" "optimized" } } */
-/* { dg-final { scan-tree-dump "clone.1 \\(_ZGVxN4vvva32_addit" "optimized" } } */
-/* { dg-final { scan-tree-dump "clone.2 \\(_ZGVxM4vl66u_addit" "optimized" } } */
+/* { dg-final { scan-tree-dump "_ZGVbN4ua32vl_setArray" "optimized" { target i?86-*-* x86_64-*-* } } } */
+/* { dg-final { scan-tree-dump "_ZGVbN4vvva32_addit" "optimized" { target i?86-*-* x86_64-*-* } } } */
+/* { dg-final { scan-tree-dump "_ZGVbM4vl66u_addit" "optimized" { target i?86-*-* x86_64-*-* } } } */
+/* { dg-final { scan-tree-dump "_ZGVcN8ua32vl_setArray" "optimized" { target i?86-*-* x86_64-*-* } } } */
+/* { dg-final { scan-tree-dump "_ZGVcN4vvva32_addit" "optimized" { target i?86-*-* x86_64-*-* } } } */
+/* { dg-final { scan-tree-dump "_ZGVcM4vl66u_addit" "optimized" { target i?86-*-* x86_64-*-* } } } */
+/* { dg-final { scan-tree-dump "_ZGVdN8ua32vl_setArray" "optimized" { target i?86-*-* x86_64-*-* } } } */
+/* { dg-final { scan-tree-dump "_ZGVdN8vvva32_addit" "optimized" { target i?86-*-* x86_64-*-* } } } */
+/* { dg-final { scan-tree-dump "_ZGVdM8vl66u_addit" "optimized" { target i?86-*-* x86_64-*-* } } } */
/* { dg-final { cleanup-tree-dump "optimized" } } */
@@ -1,5 +1,4 @@
-/* { dg-do compile { target i?86-*-* x86_64-*-* } } */
-/* { dg-options "-fopenmp -fdump-tree-optimized -O2 -msse2" } */
+/* { dg-options "-fopenmp -fdump-tree-optimized -O2" } */
/* Test that if there is no *inbranch clauses, that both the masked and
the unmasked version are created. */
@@ -10,6 +9,10 @@ int addit(int a, int b, int c)
return a + b;
}
-/* { dg-final { scan-tree-dump "clone.* \\(_ZGVxN4vvv_addit" "optimized" } } */
-/* { dg-final { scan-tree-dump "clone.* \\(_ZGVxM4vvv_addit" "optimized" } } */
+/* { dg-final { scan-tree-dump "_ZGVbN4vvv_addit" "optimized" { target i?86-*-* x86_64-*-* } } } */
+/* { dg-final { scan-tree-dump "_ZGVbM4vvv_addit" "optimized" { target i?86-*-* x86_64-*-* } } } */
+/* { dg-final { scan-tree-dump "_ZGVcN4vvv_addit" "optimized" { target i?86-*-* x86_64-*-* } } } */
+/* { dg-final { scan-tree-dump "_ZGVcM4vvv_addit" "optimized" { target i?86-*-* x86_64-*-* } } } */
+/* { dg-final { scan-tree-dump "_ZGVdN8vvv_addit" "optimized" { target i?86-*-* x86_64-*-* } } } */
+/* { dg-final { scan-tree-dump "_ZGVdM8vvv_addit" "optimized" { target i?86-*-* x86_64-*-* } } } */
/* { dg-final { cleanup-tree-dump "optimized" } } */
@@ -0,0 +1,58 @@
+/* { dg-additional-options "-fopenmp-simd" } */
+/* { dg-additional-options "-mavx" { target avx_runtime } } */
+
+#include "tree-vect.h"
+
+#ifndef N
+#define N 1024
+#endif
+
+int array[N];
+
+#pragma omp declare simd simdlen(4) notinbranch
+#pragma omp declare simd simdlen(4) notinbranch uniform(b) linear(c:3)
+#pragma omp declare simd simdlen(8) notinbranch
+#pragma omp declare simd simdlen(8) notinbranch uniform(b) linear(c:3)
+__attribute__((noinline)) int
+foo (int a, int b, int c)
+{
+ if (a < 30)
+ return 5;
+ return a + b + c;
+}
+
+__attribute__((noinline, noclone)) void
+bar ()
+{
+ int i;
+#pragma omp simd
+ for (i = 0; i < N; ++i)
+ array[i] = foo (i, 123, i * 3);
+}
+
+__attribute__((noinline, noclone)) void
+baz ()
+{
+ int i;
+#pragma omp simd
+ for (i = 0; i < N; ++i)
+ array[i] = foo (i, array[i], i * 3);
+}
+
+int
+main ()
+{
+ int i;
+ check_vect ();
+ bar ();
+ for (i = 0; i < N; i++)
+ if (array[i] != (i < 30 ? 5 : i * 4 + 123))
+ abort ();
+ baz ();
+ for (i = 0; i < N; i++)
+ if (array[i] != (i < 30 ? 5 : i * 8 + 123))
+ abort ();
+ return 0;
+}
+
+/* { dg-final { cleanup-tree-dump "vect" } } */
@@ -0,0 +1,94 @@
+/* { dg-additional-options "-fopenmp-simd" } */
+/* { dg-additional-options "-mavx" { target avx_runtime } } */
+
+#include "tree-vect.h"
+
+#ifndef N
+#define N 1024
+#endif
+
+int a[N], b[N];
+long int c[N];
+unsigned char d[N];
+
+#pragma omp declare simd simdlen(8) notinbranch
+__attribute__((noinline)) int
+foo (long int a, int b, int c)
+{
+ return a + b + c;
+}
+
+#pragma omp declare simd simdlen(8) notinbranch
+__attribute__((noinline)) long int
+bar (int a, int b, long int c)
+{
+ return a + b + c;
+}
+
+__attribute__((noinline)) void
+fn1 (void)
+{
+ int i;
+ #pragma omp simd
+ for (i = 0; i < N; i++)
+ a[i] = foo (c[i], a[i], b[i]) + 6;
+ #pragma omp simd
+ for (i = 0; i < N; i++)
+ c[i] = bar (a[i], b[i], c[i]) * 2;
+}
+
+__attribute__((noinline)) void
+fn2 (void)
+{
+ int i;
+ #pragma omp simd
+ for (i = 0; i < N; i++)
+ {
+ a[i] = foo (c[i], a[i], b[i]) + 6;
+ d[i]++;
+ }
+ #pragma omp simd
+ for (i = 0; i < N; i++)
+ {
+ c[i] = bar (a[i], b[i], c[i]) * 2;
+ d[i] /= 2;
+ }
+}
+
+__attribute__((noinline)) void
+fn3 (void)
+{
+ int i;
+ for (i = 0; i < N; i++)
+ {
+ a[i] = i * 2;
+ b[i] = 17 + (i % 37);
+ c[i] = (i & 63);
+ d[i] = 16 + i;
+ }
+}
+
+int
+main ()
+{
+ int i;
+ check_vect ();
+ fn3 ();
+ fn1 ();
+ for (i = 0; i < N; i++)
+ if (a[i] != i * 2 + 23 + (i % 37) + (i & 63)
+ || b[i] != 17 + (i % 37)
+ || c[i] != i * 4 + 80 + 4 * (i % 37) + 4 * (i & 63))
+ abort ();
+ fn3 ();
+ fn2 ();
+ for (i = 0; i < N; i++)
+ if (a[i] != i * 2 + 23 + (i % 37) + (i & 63)
+ || b[i] != 17 + (i % 37)
+ || c[i] != i * 4 + 80 + 4 * (i % 37) + 4 * (i & 63)
+ || d[i] != ((unsigned char) (17 + i)) / 2)
+ abort ();
+ return 0;
+}
+
+/* { dg-final { cleanup-tree-dump "vect" } } */
@@ -0,0 +1,74 @@
+/* { dg-additional-options "-fopenmp-simd" } */
+/* { dg-additional-options "-mavx" { target avx_runtime } } */
+
+#include "tree-vect.h"
+
+#ifndef N
+#define N 1024
+#endif
+
+int a[N];
+long long int b[N];
+short c[N];
+
+#pragma omp declare simd
+#pragma omp declare simd uniform(b) linear(c:3)
+__attribute__((noinline)) short
+foo (int a, long long int b, int c)
+{
+ return a + b + c;
+}
+
+__attribute__((noinline, noclone)) void
+bar (int x)
+{
+ int i;
+ if (x == 0)
+ {
+ #pragma omp simd
+ for (i = 0; i < N; i++)
+ c[i] = foo (a[i], b[i], c[i]);
+ }
+ else
+ {
+ #pragma omp simd
+ for (i = 0; i < N; i++)
+ c[i] = foo (a[i], x, i * 3);
+ }
+}
+
+__attribute__((noinline, noclone)) void
+baz (void)
+{
+ int i;
+ for (i = 0; i < N; i++)
+ {
+ a[i] = 2 * i;
+ b[i] = -7 * i + 6;
+ c[i] = (i & 31) << 4;
+ }
+}
+
+int
+main ()
+{
+ int i;
+ check_vect ();
+ baz ();
+ bar (0);
+ for (i = 0; i < N; i++)
+ if (a[i] != 2 * i || b[i] != 6 - 7 * i
+ || c[i] != 6 - 5 * i + ((i & 31) << 4))
+ abort ();
+ else
+ a[i] = c[i];
+ bar (17);
+ for (i = 0; i < N; i++)
+ if (a[i] != 6 - 5 * i + ((i & 31) << 4)
+ || b[i] != 6 - 7 * i
+ || c[i] != 23 - 2 * i + ((i & 31) << 4))
+ abort ();
+ return 0;
+}
+
+/* { dg-final { cleanup-tree-dump "vect" } } */
@@ -0,0 +1,52 @@
+/* { dg-additional-options "-fopenmp-simd" } */
+/* { dg-additional-options "-mavx" { target avx_runtime } } */
+
+#include "tree-vect.h"
+
+#ifndef N
+#define N 1024
+#endif
+
+int array[N] __attribute__((aligned (32)));
+
+#pragma omp declare simd simdlen(4) notinbranch aligned(a:16) uniform(a) linear(b)
+#pragma omp declare simd simdlen(4) notinbranch aligned(a:32) uniform(a) linear(b)
+#pragma omp declare simd simdlen(8) notinbranch aligned(a:16) uniform(a) linear(b)
+#pragma omp declare simd simdlen(8) notinbranch aligned(a:32) uniform(a) linear(b)
+__attribute__((noinline)) void
+foo (int *a, int b, int c)
+{
+ a[b] = c;
+}
+
+__attribute__((noinline, noclone)) void
+bar ()
+{
+ int i;
+#pragma omp simd
+ for (i = 0; i < N; ++i)
+ foo (array, i, i * array[i]);
+}
+
+__attribute__((noinline, noclone)) void
+baz ()
+{
+ int i;
+ for (i = 0; i < N; i++)
+ array[i] = 5 * (i & 7);
+}
+
+int
+main ()
+{
+ int i;
+ check_vect ();
+ baz ();
+ bar ();
+ for (i = 0; i < N; i++)
+ if (array[i] != 5 * (i & 7) * i)
+ abort ();
+ return 0;
+}
+
+/* { dg-final { cleanup-tree-dump "vect" } } */
@@ -0,0 +1,45 @@
+/* { dg-additional-options "-fopenmp-simd" } */
+/* { dg-additional-options "-mavx" { target avx_runtime } } */
+
+#include "tree-vect.h"
+
+#ifndef N
+#define N 1024
+#endif
+
+int d[N], e[N];
+
+#pragma omp declare simd simdlen(4) notinbranch uniform(b) linear(c:3)
+__attribute__((noinline)) int
+foo (int a, int b, int c)
+{
+ if (a < 30)
+ return 5;
+ return a + b + c;
+}
+
+__attribute__((noinline, noclone)) void
+bar ()
+{
+ int i;
+#pragma omp simd
+ for (i = 0; i < N; ++i)
+ {
+ d[i] = foo (i, 123, i * 3);
+ e[i] = e[i] + i;
+ }
+}
+
+int
+main ()
+{
+ int i;
+ check_vect ();
+ bar ();
+ for (i = 0; i < N; i++)
+ if (d[i] != (i < 30 ? 5 : i * 4 + 123) || e[i] != i)
+ abort ();
+ return 0;
+}
+
+/* { dg-final { cleanup-tree-dump "vect" } } */
@@ -0,0 +1,17 @@
+/* { dg-do compile } */
+
+#include "vect-simd-clone-10.h"
+
+#pragma omp declare simd notinbranch
+extern int
+foo (long int a, int b, int c)
+{
+ return a + b + c;
+}
+
+#pragma omp declare simd notinbranch
+extern long int
+bar (int a, int b, long int c)
+{
+ return a + b + c;
+}
@@ -0,0 +1,4 @@
+#pragma omp declare simd notinbranch
+extern int foo (long int a, int b, int c);
+#pragma omp declare simd notinbranch
+extern long int bar (int a, int b, long int c);
@@ -0,0 +1,83 @@
+/* { dg-additional-options "-fopenmp-simd" } */
+/* { dg-additional-options "-mavx" { target avx_runtime } } */
+/* { dg-additional-sources vect-simd-clone-10a.c } */
+
+#include "tree-vect.h"
+
+#ifndef N
+#define N 1024
+#endif
+
+int a[N], b[N];
+long int c[N];
+unsigned char d[N];
+
+#include "vect-simd-clone-10.h"
+
+__attribute__((noinline)) void
+fn1 (void)
+{
+ int i;
+ #pragma omp simd
+ for (i = 0; i < N; i++)
+ a[i] = foo (c[i], a[i], b[i]) + 6;
+ #pragma omp simd
+ for (i = 0; i < N; i++)
+ c[i] = bar (a[i], b[i], c[i]) * 2;
+}
+
+__attribute__((noinline)) void
+fn2 (void)
+{
+ int i;
+ #pragma omp simd
+ for (i = 0; i < N; i++)
+ {
+ a[i] = foo (c[i], a[i], b[i]) + 6;
+ d[i]++;
+ }
+ #pragma omp simd
+ for (i = 0; i < N; i++)
+ {
+ c[i] = bar (a[i], b[i], c[i]) * 2;
+ d[i] /= 2;
+ }
+}
+
+__attribute__((noinline)) void
+fn3 (void)
+{
+ int i;
+ for (i = 0; i < N; i++)
+ {
+ a[i] = i * 2;
+ b[i] = 17 + (i % 37);
+ c[i] = (i & 63);
+ d[i] = 16 + i;
+ }
+}
+
+int
+main ()
+{
+ int i;
+ check_vect ();
+ fn3 ();
+ fn1 ();
+ for (i = 0; i < N; i++)
+ if (a[i] != i * 2 + 23 + (i % 37) + (i & 63)
+ || b[i] != 17 + (i % 37)
+ || c[i] != i * 4 + 80 + 4 * (i % 37) + 4 * (i & 63))
+ abort ();
+ fn3 ();
+ fn2 ();
+ for (i = 0; i < N; i++)
+ if (a[i] != i * 2 + 23 + (i % 37) + (i & 63)
+ || b[i] != 17 + (i % 37)
+ || c[i] != i * 4 + 80 + 4 * (i % 37) + 4 * (i & 63)
+ || d[i] != ((unsigned char) (17 + i)) / 2)
+ abort ();
+ return 0;
+}
+
+/* { dg-final { cleanup-tree-dump "vect" } } */
@@ -0,0 +1,43 @@
+/* { dg-additional-options "-fopenmp-simd" } */
+/* { dg-additional-options "-mavx" { target avx_runtime } } */
+
+#include "tree-vect.h"
+
+#ifndef N
+#define N 1024
+#endif
+
+int d[N], e[N];
+
+#pragma omp declare simd simdlen(4) notinbranch uniform(b) linear(c:3)
+__attribute__((noinline)) long long int
+foo (int a, int b, int c)
+{
+ return a + b + c;
+}
+
+__attribute__((noinline, noclone)) void
+bar ()
+{
+ int i;
+#pragma omp simd
+ for (i = 0; i < N; ++i)
+ {
+ d[i] = foo (i, 123, i * 3);
+ e[i] = e[i] + i;
+ }
+}
+
+int
+main ()
+{
+ int i;
+ check_vect ();
+ bar ();
+ for (i = 0; i < N; i++)
+ if (d[i] != i * 4 + 123 || e[i] != i)
+ abort ();
+ return 0;
+}
+
+/* { dg-final { cleanup-tree-dump "vect" } } */
@@ -0,0 +1,48 @@
+/* { dg-additional-options "-fopenmp-simd" } */
+/* { dg-additional-options "-mavx" { target avx_runtime } } */
+
+#include "tree-vect.h"
+
+#ifndef N
+#define N 1024
+#endif
+
+float d[N];
+int e[N];
+unsigned short f[N];
+
+#pragma omp declare simd simdlen(8) notinbranch uniform(b)
+__attribute__((noinline)) float
+foo (float a, float b, float c)
+{
+ if (a < 30)
+ return 5.0f;
+ return a + b + c;
+}
+
+__attribute__((noinline, noclone)) void
+bar ()
+{
+ int i;
+#pragma omp simd
+ for (i = 0; i < N; ++i)
+ {
+ d[i] = foo (i, 123, i * 3);
+ e[i] = e[i] * 3;
+ f[i] = f[i] + 1;
+ }
+}
+
+int
+main ()
+{
+ int i;
+ check_vect ();
+ bar ();
+ for (i = 0; i < N; i++)
+ if (d[i] != (i < 30 ? 5.0f : i * 4 + 123.0f) || e[i] || f[i] != 1)
+ abort ();
+ return 0;
+}
+
+/* { dg-final { cleanup-tree-dump "vect" } } */
@@ -0,0 +1,74 @@
+/* { dg-additional-options "-fopenmp-simd" } */
+/* { dg-additional-options "-mavx" { target avx_runtime } } */
+
+#include "tree-vect.h"
+
+#ifndef N
+#define N 1024
+#endif
+
+int a[N];
+long long int b[N];
+short c[N];
+
+#pragma omp declare simd
+#pragma omp declare simd uniform(b) linear(c:3)
+__attribute__((noinline)) short
+foo (int a, long long int b, short c)
+{
+ return a + b + c;
+}
+
+__attribute__((noinline, noclone)) void
+bar (int x)
+{
+ int i;
+ if (x == 0)
+ {
+ #pragma omp simd
+ for (i = 0; i < N; i++)
+ c[i] = foo (a[i], b[i], c[i]);
+ }
+ else
+ {
+ #pragma omp simd
+ for (i = 0; i < N; i++)
+ c[i] = foo (a[i], x, i * 3);
+ }
+}
+
+__attribute__((noinline, noclone)) void
+baz (void)
+{
+ int i;
+ for (i = 0; i < N; i++)
+ {
+ a[i] = 2 * i;
+ b[i] = -7 * i + 6;
+ c[i] = (i & 31) << 4;
+ }
+}
+
+int
+main ()
+{
+ int i;
+ check_vect ();
+ baz ();
+ bar (0);
+ for (i = 0; i < N; i++)
+ if (a[i] != 2 * i || b[i] != 6 - 7 * i
+ || c[i] != 6 - 5 * i + ((i & 31) << 4))
+ abort ();
+ else
+ a[i] = c[i];
+ bar (17);
+ for (i = 0; i < N; i++)
+ if (a[i] != 6 - 5 * i + ((i & 31) << 4)
+ || b[i] != 6 - 7 * i
+ || c[i] != 23 - 2 * i + ((i & 31) << 4))
+ abort ();
+ return 0;
+}
+
+/* { dg-final { cleanup-tree-dump "vect" } } */
@@ -0,0 +1,94 @@
+/* { dg-additional-options "-fopenmp-simd" } */
+/* { dg-additional-options "-mavx" { target avx_runtime } } */
+
+#include "tree-vect.h"
+
+#ifndef N
+#define N 1024
+#endif
+
+int a[N], b[N];
+long int c[N];
+unsigned char d[N];
+
+#pragma omp declare simd notinbranch
+__attribute__((noinline)) static int
+foo (long int a, int b, int c)
+{
+ return a + b + c;
+}
+
+#pragma omp declare simd notinbranch
+__attribute__((noinline)) static long int
+bar (int a, int b, long int c)
+{
+ return a + b + c;
+}
+
+__attribute__((noinline)) void
+fn1 (void)
+{
+ int i;
+ #pragma omp simd
+ for (i = 0; i < N; i++)
+ a[i] = foo (c[i], a[i], b[i]) + 6;
+ #pragma omp simd
+ for (i = 0; i < N; i++)
+ c[i] = bar (a[i], b[i], c[i]) * 2;
+}
+
+__attribute__((noinline)) void
+fn2 (void)
+{
+ int i;
+ #pragma omp simd
+ for (i = 0; i < N; i++)
+ {
+ a[i] = foo (c[i], a[i], b[i]) + 6;
+ d[i]++;
+ }
+ #pragma omp simd
+ for (i = 0; i < N; i++)
+ {
+ c[i] = bar (a[i], b[i], c[i]) * 2;
+ d[i] /= 2;
+ }
+}
+
+__attribute__((noinline)) void
+fn3 (void)
+{
+ int i;
+ for (i = 0; i < N; i++)
+ {
+ a[i] = i * 2;
+ b[i] = 17 + (i % 37);
+ c[i] = (i & 63);
+ d[i] = 16 + i;
+ }
+}
+
+int
+main ()
+{
+ int i;
+ check_vect ();
+ fn3 ();
+ fn1 ();
+ for (i = 0; i < N; i++)
+ if (a[i] != i * 2 + 23 + (i % 37) + (i & 63)
+ || b[i] != 17 + (i % 37)
+ || c[i] != i * 4 + 80 + 4 * (i % 37) + 4 * (i & 63))
+ abort ();
+ fn3 ();
+ fn2 ();
+ for (i = 0; i < N; i++)
+ if (a[i] != i * 2 + 23 + (i % 37) + (i & 63)
+ || b[i] != 17 + (i % 37)
+ || c[i] != i * 4 + 80 + 4 * (i % 37) + 4 * (i & 63)
+ || d[i] != ((unsigned char) (17 + i)) / 2)
+ abort ();
+ return 0;
+}
+
+/* { dg-final { cleanup-tree-dump "vect" } } */
@@ -256,22 +256,27 @@ struct GTY(()) cgraph_clone_info
bitmap combined_args_to_skip;
};
-enum simd_clone_arg_type
+enum cgraph_simd_clone_arg_type
{
SIMD_CLONE_ARG_TYPE_VECTOR,
SIMD_CLONE_ARG_TYPE_UNIFORM,
SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP,
- SIMD_CLONE_ARG_TYPE_LINEAR_VARIABLE_STEP
+ SIMD_CLONE_ARG_TYPE_LINEAR_VARIABLE_STEP,
+ SIMD_CLONE_ARG_TYPE_MASK
};
/* Function arguments in the original function of a SIMD clone.
Supplementary data for `struct simd_clone'. */
-struct GTY(()) simd_clone_arg {
- /* Original function argument as it orignally existed in
+struct GTY(()) cgraph_simd_clone_arg {
+ /* Original function argument as it originally existed in
DECL_ARGUMENTS. */
tree orig_arg;
+ /* orig_arg's function (or for extern functions type from
+ TYPE_ARG_TYPES). */
+ tree orig_type;
+
/* If argument is a vector, this holds the vector version of
orig_arg that after adjusting the argument types will live in
DECL_ARGUMENTS. Otherwise, this is NULL.
@@ -280,6 +285,9 @@ struct GTY(()) simd_clone_arg {
vector(simdlen) __typeof__(orig_arg) new_arg. */
tree vector_arg;
+ /* vector_arg's type (or for extern functions new vector type. */
+ tree vector_type;
+
/* If argument is a vector, this holds the array where the simd
argument is held while executing the simd clone function. This
is a local variable in the cloned function. Its content is
@@ -291,7 +299,7 @@ struct GTY(()) simd_clone_arg {
/* A SIMD clone's argument can be either linear (constant or
variable), uniform, or vector. */
- enum simd_clone_arg_type arg_type;
+ enum cgraph_simd_clone_arg_type arg_type;
/* For arg_type SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP this is
the constant linear step, if arg_type is
@@ -305,7 +313,7 @@ struct GTY(()) simd_clone_arg {
/* Specific data for a SIMD function clone. */
-struct GTY(()) simd_clone {
+struct GTY(()) cgraph_simd_clone {
/* Number of words in the SIMD lane associated with this clone. */
unsigned int simdlen;
@@ -313,8 +321,11 @@ struct GTY(()) simd_clone {
usually the number of named arguments in FNDECL. */
unsigned int nargs;
- /* Max hardware vector size in bits. */
- unsigned int hw_vector_size;
+ /* Max hardware vector size in bits for integral vectors. */
+ unsigned int vecsize_int;
+
+ /* Max hardware vector size in bits for floating point vectors. */
+ unsigned int vecsize_float;
/* The mangling character for a given vector size. This is is used
to determine the ISA mangling bit as specified in the Intel
@@ -328,8 +339,14 @@ struct GTY(()) simd_clone {
/* True if this is a Cilk Plus variant. */
unsigned int cilk_elemental : 1;
+ /* Doubly linked list of SIMD clones. */
+ struct cgraph_node *prev_clone, *next_clone;
+
+ /* Original cgraph node the SIMD clones were created for. */
+ struct cgraph_node *origin;
+
/* Annotated function arguments for the original function. */
- struct simd_clone_arg GTY((length ("%h.nargs"))) args[1];
+ struct cgraph_simd_clone_arg GTY((length ("%h.nargs"))) args[1];
};
@@ -362,11 +379,9 @@ public:
/* If this is a SIMD clone, this points to the SIMD specific
information for it. */
- struct simd_clone *simdclone;
-
- /* If this is a SIMD clone, this points to the original scalar
- function. */
- struct cgraph_node *simdclone_of;
+ struct cgraph_simd_clone *simdclone;
+ /* If this function has SIMD clones, this points to the first clone. */
+ struct cgraph_node *simd_clones;
/* Interprocedural passes scheduled to have their transform functions
applied next time we execute local pass on them. We maintain it
@@ -411,8 +426,6 @@ public:
/* ?? We should be able to remove this. We have enough bits in
cgraph to calculate it. */
unsigned tm_clone : 1;
- /* True if this function has SIMD clones. */
- unsigned has_simd_clones : 1;
/* True if this decl is a dispatcher for function versions. */
unsigned dispatcher_function : 1;
};
@@ -819,7 +832,6 @@ void cgraph_speculative_call_info (struc
struct cgraph_edge *&,
struct ipa_ref *&);
extern bool gimple_check_call_matching_types (gimple, tree, bool);
-struct cgraph_node *get_simd_clone (struct cgraph_node *, tree);
/* In cgraphunit.c */
struct asm_node *add_asm_node (tree);
@@ -2154,7 +2154,7 @@ vectorizable_simd_clone_call (gimple stm
vec<simd_call_arg_info> arginfo = vNULL;
vec<tree> vargs = vNULL;
size_t i, nargs;
- tree lhs, rtype;
+ tree lhs, rtype, ratype;
vec<constructor_elt, va_gc> *ret_ctor_elts;
/* Is STMT a vectorizable call? */
@@ -2166,7 +2166,7 @@ vectorizable_simd_clone_call (gimple stm
return false;
struct cgraph_node *node = cgraph_get_node (fndecl);
- if (node == NULL || !node->has_simd_clones)
+ if (node == NULL || node->simd_clones == NULL)
return false;
if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
@@ -2238,70 +2238,77 @@ vectorizable_simd_clone_call (gimple stm
}
unsigned int badness = 0;
- /* FIXME: Nasty kludge until we figure out where to put the clone
- list-- perhaps, next_sibling_clone/prev_sibling_clone in
- cgraph_node ??. */
- struct cgraph_node *bestn = NULL, *n;
- FOR_EACH_FUNCTION (n)
- if (n->simdclone_of == node)
- {
- unsigned int this_badness = 0;
- if (n->simdclone->simdlen
- > (unsigned) LOOP_VINFO_VECT_FACTOR (loop_vinfo)
- || n->simdclone->nargs != nargs)
- continue;
- if (n->simdclone->simdlen
- < (unsigned) LOOP_VINFO_VECT_FACTOR (loop_vinfo))
- this_badness += (exact_log2 (LOOP_VINFO_VECT_FACTOR (loop_vinfo))
- - exact_log2 (n->simdclone->simdlen)) * 1024;
- if (n->simdclone->inbranch)
- this_badness += 2048;
- /* FORNOW: Have to add code to add the mask argument. */
- if (n->simdclone->inbranch)
- continue;
- for (i = 0; i < nargs; i++)
- {
- switch (n->simdclone->args[i].arg_type)
- {
- case SIMD_CLONE_ARG_TYPE_VECTOR:
- if (arginfo[i].vectype == NULL_TREE
- || arginfo[i].linear_step)
- this_badness += 64;
- break;
- case SIMD_CLONE_ARG_TYPE_UNIFORM:
- if (arginfo[i].vectype != NULL_TREE)
- i = -1;
- break;
- case SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP:
- if (arginfo[i].vectype == NULL_TREE
- || (arginfo[i].linear_step
- != n->simdclone->args[i].linear_step))
- i = -1;
- break;
- case SIMD_CLONE_ARG_TYPE_LINEAR_VARIABLE_STEP:
- /* FORNOW */
+ struct cgraph_node *bestn = NULL;
+ for (struct cgraph_node *n = node->simd_clones; n != NULL;
+ n = n->simdclone->next_clone)
+ {
+ unsigned int this_badness = 0;
+ if (n->simdclone->simdlen
+ > (unsigned) LOOP_VINFO_VECT_FACTOR (loop_vinfo)
+ || n->simdclone->nargs != nargs)
+ continue;
+ if (n->simdclone->simdlen
+ < (unsigned) LOOP_VINFO_VECT_FACTOR (loop_vinfo))
+ this_badness += (exact_log2 (LOOP_VINFO_VECT_FACTOR (loop_vinfo))
+ - exact_log2 (n->simdclone->simdlen)) * 1024;
+ if (n->simdclone->inbranch)
+ this_badness += 2048;
+ int target_badness = targetm.simd_clone.usable (n);
+ if (target_badness < 0)
+ continue;
+ this_badness += target_badness * 512;
+ /* FORNOW: Have to add code to add the mask argument. */
+ if (n->simdclone->inbranch)
+ continue;
+ for (i = 0; i < nargs; i++)
+ {
+ switch (n->simdclone->args[i].arg_type)
+ {
+ case SIMD_CLONE_ARG_TYPE_VECTOR:
+ if (!useless_type_conversion_p
+ (n->simdclone->args[i].orig_type,
+ TREE_TYPE (gimple_call_arg (stmt, i))))
i = -1;
- break;
- }
- if (i == (size_t) -1)
+ else if (arginfo[i].vectype == NULL_TREE
+ || arginfo[i].linear_step)
+ this_badness += 64;
break;
- if (n->simdclone->args[i].alignment > arginfo[i].align)
- {
+ case SIMD_CLONE_ARG_TYPE_UNIFORM:
+ if (arginfo[i].vectype != NULL_TREE)
i = -1;
- break;
- }
- if (arginfo[i].align)
- this_badness += (exact_log2 (arginfo[i].align)
- - exact_log2 (n->simdclone->args[i].alignment));
- }
- if (i == (size_t) -1)
- continue;
- if (bestn == NULL || this_badness < badness)
- {
- bestn = n;
- badness = this_badness;
- }
- }
+ break;
+ case SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP:
+ if (arginfo[i].vectype == NULL_TREE
+ || (arginfo[i].linear_step
+ != n->simdclone->args[i].linear_step))
+ i = -1;
+ break;
+ case SIMD_CLONE_ARG_TYPE_LINEAR_VARIABLE_STEP:
+ /* FORNOW */
+ i = -1;
+ break;
+ case SIMD_CLONE_ARG_TYPE_MASK:
+ gcc_unreachable ();
+ }
+ if (i == (size_t) -1)
+ break;
+ if (n->simdclone->args[i].alignment > arginfo[i].align)
+ {
+ i = -1;
+ break;
+ }
+ if (arginfo[i].align)
+ this_badness += (exact_log2 (arginfo[i].align)
+ - exact_log2 (n->simdclone->args[i].alignment));
+ }
+ if (i == (size_t) -1)
+ continue;
+ if (bestn == NULL || this_badness < badness)
+ {
+ bestn = n;
+ badness = this_badness;
+ }
+ }
if (bestn == NULL)
{
@@ -2309,6 +2316,22 @@ vectorizable_simd_clone_call (gimple stm
return false;
}
+ for (i = 0; i < nargs; i++)
+ if (arginfo[i].vectype == NULL_TREE
+ && bestn->simdclone->args[i].arg_type == SIMD_CLONE_ARG_TYPE_VECTOR)
+ {
+ arginfo[i].vectype
+ = get_vectype_for_scalar_type (TREE_TYPE (gimple_call_arg (stmt,
+ i)));
+ if (arginfo[i].vectype == NULL
+ || (TYPE_VECTOR_SUBPARTS (arginfo[i].vectype)
+ > bestn->simdclone->simdlen))
+ {
+ arginfo.release ();
+ return false;
+ }
+ }
+
fndecl = bestn->decl;
nunits = bestn->simdclone->simdlen;
ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
@@ -2347,10 +2370,16 @@ vectorizable_simd_clone_call (gimple stm
scalar_dest = gimple_call_lhs (stmt);
vec_dest = NULL_TREE;
rtype = NULL_TREE;
+ ratype = NULL_TREE;
if (scalar_dest)
{
vec_dest = vect_create_destination_var (scalar_dest, vectype);
rtype = TREE_TYPE (TREE_TYPE (fndecl));
+ if (TREE_CODE (rtype) == ARRAY_TYPE)
+ {
+ ratype = rtype;
+ rtype = TREE_TYPE (ratype);
+ }
}
prev_stmt_info = NULL;
@@ -2364,81 +2393,90 @@ vectorizable_simd_clone_call (gimple stm
for (i = 0; i < nargs; i++)
{
- unsigned int k, l;
+ unsigned int k, l, m, o;
tree atype;
op = gimple_call_arg (stmt, i);
switch (bestn->simdclone->args[i].arg_type)
{
case SIMD_CLONE_ARG_TYPE_VECTOR:
- /* FIXME */
- atype = TREE_TYPE (bestn->simdclone->args[i].vector_arg);
- gcc_assert (TYPE_VECTOR_SUBPARTS (atype) == nunits);
- if (nunits < TYPE_VECTOR_SUBPARTS (arginfo[i].vectype))
+ atype = bestn->simdclone->args[i].vector_type;
+ o = nunits / TYPE_VECTOR_SUBPARTS (atype);
+ for (m = j * o; m < (j + 1) * o; m++)
{
- unsigned int prec = GET_MODE_BITSIZE (TYPE_MODE (atype));
- k = TYPE_VECTOR_SUBPARTS (arginfo[i].vectype) / nunits;
- gcc_assert ((k & (k - 1)) == 0);
- if (j == 0)
- vec_oprnd0
- = vect_get_vec_def_for_operand (op, stmt, NULL);
- else
+ if (TYPE_VECTOR_SUBPARTS (atype)
+ < TYPE_VECTOR_SUBPARTS (arginfo[i].vectype))
{
- vec_oprnd0 = arginfo[i].op;
- if ((j & (k - 1)) == 0)
+ unsigned int prec = GET_MODE_BITSIZE (TYPE_MODE (atype));
+ k = (TYPE_VECTOR_SUBPARTS (arginfo[i].vectype)
+ / TYPE_VECTOR_SUBPARTS (atype));
+ gcc_assert ((k & (k - 1)) == 0);
+ if (m == 0)
vec_oprnd0
- = vect_get_vec_def_for_stmt_copy (arginfo[i].dt,
- vec_oprnd0);
+ = vect_get_vec_def_for_operand (op, stmt, NULL);
+ else
+ {
+ vec_oprnd0 = arginfo[i].op;
+ if ((m & (k - 1)) == 0)
+ vec_oprnd0
+ = vect_get_vec_def_for_stmt_copy (arginfo[i].dt,
+ vec_oprnd0);
+ }
+ arginfo[i].op = vec_oprnd0;
+ vec_oprnd0
+ = build3 (BIT_FIELD_REF, atype, vec_oprnd0,
+ build_int_cst (integer_type_node, prec),
+ build_int_cst (integer_type_node,
+ (m & (k - 1)) * prec));
+ new_stmt
+ = gimple_build_assign_with_ops (BIT_FIELD_REF,
+ make_ssa_name (atype,
+ NULL),
+ vec_oprnd0, NULL_TREE);
+ vect_finish_stmt_generation (stmt, new_stmt, gsi);
+ vargs.safe_push (gimple_assign_lhs (new_stmt));
}
- arginfo[i].op = vec_oprnd0;
- vec_oprnd0 = build3 (BIT_FIELD_REF, atype, vec_oprnd0,
- build_int_cst (integer_type_node, prec),
- build_int_cst (integer_type_node,
- (j & (k - 1)) * prec));
- new_stmt
- = gimple_build_assign_with_ops (BIT_FIELD_REF,
- make_ssa_name (atype,
- NULL),
- vec_oprnd0, NULL_TREE);
- vect_finish_stmt_generation (stmt, new_stmt, gsi);
- vargs.quick_push (gimple_assign_lhs (new_stmt));
- break;
- }
- k = nunits / TYPE_VECTOR_SUBPARTS (arginfo[i].vectype);
- gcc_assert ((k & (k - 1)) == 0);
- vec<constructor_elt, va_gc> *ctor_elts;
- if (k != 1)
- vec_alloc (ctor_elts, k);
- else
- ctor_elts = NULL;
- for (l = 0; l < k; l++)
- {
- if (j == 0 && l == 0)
- vec_oprnd0
- = vect_get_vec_def_for_operand (op, stmt, NULL);
else
- vec_oprnd0
- = vect_get_vec_def_for_stmt_copy (arginfo[i].dt,
- arginfo[i].op);
- arginfo[i].op = vec_oprnd0;
- if (k == 1)
- break;
- CONSTRUCTOR_APPEND_ELT (ctor_elts, NULL_TREE, vec_oprnd0);
- }
- if (k == 1)
- {
- vargs.quick_push (vec_oprnd0);
- break;
+ {
+ k = (TYPE_VECTOR_SUBPARTS (atype)
+ / TYPE_VECTOR_SUBPARTS (arginfo[i].vectype));
+ gcc_assert ((k & (k - 1)) == 0);
+ vec<constructor_elt, va_gc> *ctor_elts;
+ if (k != 1)
+ vec_alloc (ctor_elts, k);
+ else
+ ctor_elts = NULL;
+ for (l = 0; l < k; l++)
+ {
+ if (m == 0 && l == 0)
+ vec_oprnd0
+ = vect_get_vec_def_for_operand (op, stmt, NULL);
+ else
+ vec_oprnd0
+ = vect_get_vec_def_for_stmt_copy (arginfo[i].dt,
+ arginfo[i].op);
+ arginfo[i].op = vec_oprnd0;
+ if (k == 1)
+ break;
+ CONSTRUCTOR_APPEND_ELT (ctor_elts, NULL_TREE,
+ vec_oprnd0);
+ }
+ if (k == 1)
+ vargs.safe_push (vec_oprnd0);
+ else
+ {
+ vec_oprnd0 = build_constructor (atype, ctor_elts);
+ new_stmt
+ = gimple_build_assign_with_ops
+ (CONSTRUCTOR, make_ssa_name (atype, NULL),
+ vec_oprnd0, NULL_TREE);
+ vect_finish_stmt_generation (stmt, new_stmt, gsi);
+ vargs.safe_push (gimple_assign_lhs (new_stmt));
+ }
+ }
}
- vec_oprnd0 = build_constructor (atype, ctor_elts);
- new_stmt
- = gimple_build_assign_with_ops (CONSTRUCTOR,
- make_ssa_name (atype, NULL),
- vec_oprnd0, NULL_TREE);
- vect_finish_stmt_generation (stmt, new_stmt, gsi);
- vargs.quick_push (gimple_assign_lhs (new_stmt));
break;
case SIMD_CLONE_ARG_TYPE_UNIFORM:
- vargs.quick_push (op);
+ vargs.safe_push (op);
break;
case SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP:
if (j == 0)
@@ -2481,7 +2519,7 @@ vectorizable_simd_clone_call (gimple stm
add_phi_arg (new_phi, phi_arg, loop_latch_edge (loop),
UNKNOWN_LOCATION);
arginfo[i].op = phi_res;
- vargs.quick_push (phi_res);
+ vargs.safe_push (phi_res);
}
else
{
@@ -2499,7 +2537,7 @@ vectorizable_simd_clone_call (gimple stm
= gimple_build_assign_with_ops (code, new_temp,
arginfo[i].op, tcst);
vect_finish_stmt_generation (stmt, new_stmt, gsi);
- vargs.quick_push (new_temp);
+ vargs.safe_push (new_temp);
}
break;
case SIMD_CLONE_ARG_TYPE_LINEAR_VARIABLE_STEP:
@@ -2511,8 +2549,11 @@ vectorizable_simd_clone_call (gimple stm
new_stmt = gimple_build_call_vec (fndecl, vargs);
if (vec_dest)
{
- gcc_assert (TYPE_VECTOR_SUBPARTS (rtype) == nunits);
- if (TYPE_VECTOR_SUBPARTS (vectype) == TYPE_VECTOR_SUBPARTS (rtype))
+ gcc_assert (ratype || TYPE_VECTOR_SUBPARTS (rtype) == nunits);
+ if (ratype)
+ new_temp = create_tmp_var (ratype, NULL);
+ else if (TYPE_VECTOR_SUBPARTS (vectype)
+ == TYPE_VECTOR_SUBPARTS (rtype))
new_temp = make_ssa_name (vec_dest, new_stmt);
else
new_temp = make_ssa_name (rtype, new_stmt);
@@ -2522,21 +2563,28 @@ vectorizable_simd_clone_call (gimple stm
if (vec_dest)
{
- if (TYPE_VECTOR_SUBPARTS (vectype) < TYPE_VECTOR_SUBPARTS (rtype))
+ if (TYPE_VECTOR_SUBPARTS (vectype) < nunits)
{
unsigned int k, l;
unsigned int prec = GET_MODE_BITSIZE (TYPE_MODE (vectype));
- k = (TYPE_VECTOR_SUBPARTS (rtype)
- / TYPE_VECTOR_SUBPARTS (vectype));
+ k = nunits / TYPE_VECTOR_SUBPARTS (vectype);
gcc_assert ((k & (k - 1)) == 0);
for (l = 0; l < k; l++)
{
- tree t = build3 (BIT_FIELD_REF, vectype, new_temp,
- build_int_cst (integer_type_node, prec),
- build_int_cst (integer_type_node,
- l * prec));
+ tree t;
+ if (ratype)
+ {
+ t = build_fold_addr_expr (new_temp);
+ t = build2 (MEM_REF, vectype, t,
+ build_int_cst (TREE_TYPE (t),
+ l * prec / BITS_PER_UNIT));
+ }
+ else
+ t = build3 (BIT_FIELD_REF, vectype, new_temp,
+ build_int_cst (integer_type_node, prec),
+ build_int_cst (integer_type_node, l * prec));
new_stmt
- = gimple_build_assign_with_ops (BIT_FIELD_REF,
+ = gimple_build_assign_with_ops (TREE_CODE (t),
make_ssa_name (vectype,
NULL),
t, NULL_TREE);
@@ -2548,17 +2596,45 @@ vectorizable_simd_clone_call (gimple stm
prev_stmt_info = vinfo_for_stmt (new_stmt);
}
+
+ if (ratype)
+ {
+ tree clobber = build_constructor (ratype, NULL);
+ TREE_THIS_VOLATILE (clobber) = 1;
+ new_stmt = gimple_build_assign (new_temp, clobber);
+ vect_finish_stmt_generation (stmt, new_stmt, gsi);
+ }
continue;
}
- else if (TYPE_VECTOR_SUBPARTS (vectype)
- > TYPE_VECTOR_SUBPARTS (rtype))
+ else if (TYPE_VECTOR_SUBPARTS (vectype) > nunits)
{
unsigned int k = (TYPE_VECTOR_SUBPARTS (vectype)
/ TYPE_VECTOR_SUBPARTS (rtype));
gcc_assert ((k & (k - 1)) == 0);
if ((j & (k - 1)) == 0)
vec_alloc (ret_ctor_elts, k);
- CONSTRUCTOR_APPEND_ELT (ret_ctor_elts, NULL_TREE, new_temp);
+ if (ratype)
+ {
+ unsigned int m, o = nunits / TYPE_VECTOR_SUBPARTS (rtype);
+ for (m = 0; m < o; m++)
+ {
+ tree tem = build4 (ARRAY_REF, rtype, new_temp,
+ size_int (m), NULL_TREE, NULL_TREE);
+ new_stmt
+ = gimple_build_assign_with_ops (ARRAY_REF, rtype,
+ make_ssa_name (rtype,
+ NULL),
+ tem);
+ vect_finish_stmt_generation (stmt, new_stmt, gsi);
+ CONSTRUCTOR_APPEND_ELT (ret_ctor_elts, NULL_TREE, tem);
+ }
+ tree clobber = build_constructor (ratype, NULL);
+ TREE_THIS_VOLATILE (clobber) = 1;
+ new_stmt = gimple_build_assign (new_temp, clobber);
+ vect_finish_stmt_generation (stmt, new_stmt, gsi);
+ }
+ else
+ CONSTRUCTOR_APPEND_ELT (ret_ctor_elts, NULL_TREE, new_temp);
if ((j & (k - 1)) != k - 1)
continue;
vec_oprnd0 = build_constructor (vectype, ret_ctor_elts);
@@ -2576,6 +2652,22 @@ vectorizable_simd_clone_call (gimple stm
prev_stmt_info = vinfo_for_stmt (new_stmt);
continue;
}
+ else if (ratype)
+ {
+ tree t = build_fold_addr_expr (new_temp);
+ t = build2 (MEM_REF, vectype, t,
+ build_int_cst (TREE_TYPE (t), 0));
+ new_stmt
+ = gimple_build_assign_with_ops (MEM_REF, vectype,
+ make_ssa_name (vec_dest,
+ NULL), t);
+ vect_finish_stmt_generation (stmt, new_stmt, gsi);
+ tree clobber = build_constructor (ratype, NULL);
+ TREE_THIS_VOLATILE (clobber) = 1;
+ vect_finish_stmt_generation (stmt,
+ gimple_build_assign (new_temp,
+ clobber), gsi);
+ }
}
if (j == 0)