@@ -43375,39 +43375,167 @@ ix86_memmodel_check (unsigned HOST_WIDE_
return val;
}
-/* Return the default mangling character when no vector size can be
- determined from the `processor' clause. */
-
-static char
-ix86_cilkplus_default_vecsize_mangle (struct cgraph_node *clone
- ATTRIBUTE_UNUSED)
+/* Set CLONEI->vecsize_mangle, CLONEI->vecsize_int,
+ CLONEI->vecsize_float and if CLONEI->simdlen is 0, also
+ CLONEI->simdlen. Return 0 if SIMD clones shouldn't be emitted,
+ or number of vecsize_mangle variants that should be emitted. */
+
+static int
+ix86_simd_clone_compute_vecsize_and_simdlen (struct cgraph_node *node,
+ struct cgraph_simd_clone *clonei,
+ tree base_type, int num)
{
- return 'x';
+ int ret = 1;
+
+ if (clonei->simdlen
+ && (clonei->simdlen < 2
+ || clonei->simdlen > 16
+ || (clonei->simdlen & (clonei->simdlen - 1)) != 0))
+ {
+ warning_at (DECL_SOURCE_LOCATION (node->decl), 0,
+ "unsupported simdlen %d\n", clonei->simdlen);
+ return 0;
+ }
+
+ tree ret_type = TREE_TYPE (TREE_TYPE (node->decl));
+ if (TREE_CODE (ret_type) != VOID_TYPE)
+ switch (TYPE_MODE (ret_type))
+ {
+ case QImode:
+ case HImode:
+ case SImode:
+ case DImode:
+ case SFmode:
+ case DFmode:
+ /* case SCmode: */
+ /* case DCmode: */
+ break;
+ default:
+ warning_at (DECL_SOURCE_LOCATION (node->decl), 0,
+ "unsupported return type %qT for simd\n", ret_type);
+ return 0;
+ }
+
+ tree t;
+ int i;
+
+ for (t = DECL_ARGUMENTS (node->decl), i = 0; t; t = DECL_CHAIN (t), i++)
+ /* FIXME: Shouldn't we allow such arguments if they are uniform? */
+ switch (TYPE_MODE (TREE_TYPE (t)))
+ {
+ case QImode:
+ case HImode:
+ case SImode:
+ case DImode:
+ case SFmode:
+ case DFmode:
+ /* case SCmode: */
+ /* case DCmode: */
+ break;
+ default:
+ warning_at (DECL_SOURCE_LOCATION (node->decl), 0,
+ "unsupported argument type %qT for simd\n", TREE_TYPE (t));
+ return 0;
+ }
+
+ if (clonei->cilk_elemental)
+ {
+ /* Parse here processor clause. If not present, default to 'x'. */
+ clonei->vecsize_mangle = 'x';
+ }
+ else
+ {
+ clonei->vecsize_mangle = "xyY"[num];
+ ret = 3;
+ }
+ switch (clonei->vecsize_mangle)
+ {
+ case 'x':
+ clonei->vecsize_int = 128;
+ clonei->vecsize_float = 128;
+ break;
+ case 'y':
+ clonei->vecsize_int = 128;
+ clonei->vecsize_float = 256;
+ break;
+ case 'Y':
+ clonei->vecsize_int = 256;
+ clonei->vecsize_float = 256;
+ break;
+ }
+ if (clonei->simdlen == 0)
+ {
+ if (SCALAR_INT_MODE_P (TYPE_MODE (base_type)))
+ clonei->simdlen = clonei->vecsize_int;
+ else
+ clonei->simdlen = clonei->vecsize_float;
+ clonei->simdlen /= GET_MODE_BITSIZE (TYPE_MODE (base_type));
+ if (clonei->simdlen > 16)
+ clonei->simdlen = 16;
+ }
+ return ret;
}
-/* Return the hardware vector size (in bits) for a mangling
- character. */
+/* Add target attribute to SIMD clone NODE if needed. */
-static unsigned int
-ix86_cilkplus_vecsize_for_mangle (char mangle)
+static void
+ix86_simd_clone_adjust (struct cgraph_node *node)
{
- /* ?? Intel currently has no ISA encoding character for AVX-512. */
- switch (mangle)
+ const char *str = NULL;
+ gcc_assert (node->decl == cfun->decl);
+ switch (node->simdclone->vecsize_mangle)
{
case 'x':
- /* xmm (SSE2). */
- return 128;
+ if (!TARGET_SSE2)
+ str = "sse2";
+ break;
case 'y':
- /* ymm1 (AVX1). */
+ if (!TARGET_AVX)
+ str = "avx";
+ break;
case 'Y':
- /* ymm2 (AVX2). */
- return 256;
- case 'z':
- /* zmm (MIC). */
- return 512;
+ if (!TARGET_AVX2)
+ str = "avx2";
+ break;
default:
gcc_unreachable ();
+ }
+ if (str == NULL)
+ return;
+ push_cfun (NULL);
+ tree args = build_tree_list (NULL_TREE, build_string (strlen (str), str));
+ bool ok = ix86_valid_target_attribute_p (node->decl, NULL, args, 0);
+ gcc_assert (ok);
+ pop_cfun ();
+}
+
+/* If SIMD clone NODE can't be used in a vectorized loop
+ in current function, return -1, otherwise return a badness of using it
+ (0 if it is most desirable from vecsize_mangle point of view, 1
+ slightly less desirable, etc.). */
+
+static int
+ix86_simd_clone_usable (struct cgraph_node *node)
+{
+ switch (node->simdclone->vecsize_mangle)
+ {
+ case 'x':
+ if (!TARGET_SSE2)
+ return -1;
+ if (!TARGET_AVX)
+ return 0;
+ return TARGET_AVX2 ? 2 : 1;
+ case 'y':
+ if (!TARGET_AVX)
+ return -1;
+ return TARGET_AVX2 ? 1 : 0;
+ break;
+ case 'Y':
+ if (!TARGET_AVX2)
+ return -1;
return 0;
+ default:
+ gcc_unreachable ();
}
}
@@ -43783,13 +43911,17 @@ ix86_cilkplus_vecsize_for_mangle (char m
#undef TARGET_SPILL_CLASS
#define TARGET_SPILL_CLASS ix86_spill_class
-#undef TARGET_CILKPLUS_DEFAULT_VECSIZE_MANGLE
-#define TARGET_CILKPLUS_DEFAULT_VECSIZE_MANGLE \
- ix86_cilkplus_default_vecsize_mangle
-
-#undef TARGET_CILKPLUS_VECSIZE_FOR_MANGLE
-#define TARGET_CILKPLUS_VECSIZE_FOR_MANGLE \
- ix86_cilkplus_vecsize_for_mangle
+#undef TARGET_SIMD_CLONE_COMPUTE_VECSIZE_AND_SIMDLEN
+#define TARGET_SIMD_CLONE_COMPUTE_VECSIZE_AND_SIMDLEN \
+ ix86_simd_clone_compute_vecsize_and_simdlen
+
+#undef TARGET_SIMD_CLONE_ADJUST
+#define TARGET_SIMD_CLONE_ADJUST \
+ ix86_simd_clone_adjust
+
+#undef TARGET_SIMD_CLONE_USABLE
+#define TARGET_SIMD_CLONE_USABLE \
+ ix86_simd_clone_usable
struct gcc_target targetm = TARGET_INITIALIZER;
@@ -11214,39 +11214,41 @@ argno_map::argno_map (tree fndecl)
/* Allocate a fresh `simd_clone' and return it. NARGS is the number
of arguments to reserve space for. */
-static struct simd_clone *
+static struct cgraph_simd_clone *
simd_clone_struct_alloc (int nargs)
{
- struct simd_clone *clone_info;
- size_t len = (sizeof (struct simd_clone)
- + nargs * sizeof (struct simd_clone_arg));
- clone_info = ggc_alloc_cleared_simd_clone_stat (len PASS_MEM_STAT);
+ struct cgraph_simd_clone *clone_info;
+ size_t len = (sizeof (struct cgraph_simd_clone)
+ + nargs * sizeof (struct cgraph_simd_clone_arg));
+ clone_info = (struct cgraph_simd_clone *)
+ ggc_internal_cleared_alloc_stat (len PASS_MEM_STAT);
return clone_info;
}
-/* Make a copy of the `struct simd_clone' in FROM to TO. */
+/* Make a copy of the `struct cgraph_simd_clone' in FROM to TO. */
static inline void
-simd_clone_struct_copy (struct simd_clone *to, struct simd_clone *from)
+simd_clone_struct_copy (struct cgraph_simd_clone *to,
+ struct cgraph_simd_clone *from)
{
- memcpy (to, from, (sizeof (struct simd_clone)
- + from->nargs * sizeof (struct simd_clone_arg)));
+ memcpy (to, from, (sizeof (struct cgraph_simd_clone)
+ + from->nargs * sizeof (struct cgraph_simd_clone_arg)));
}
-/* Given a simd clone in NEW_NODE, extract the simd specific
- information from the OMP clauses passed in CLAUSES, and set the
- relevant bits in the cgraph node. *INBRANCH_SPECIFIED is set to
- TRUE if the `inbranch' or `notinbranch' clause specified, otherwise
- set to FALSE. */
+/* Given a simd function in NODE, extract the simd specific
+ information from the OMP clauses passed in CLAUSES, and return
+ the struct cgraph_simd_clone * if it should be cloned. *INBRANCH_SPECIFIED
+ is set to TRUE if the `inbranch' or `notinbranch' clause specified,
+ otherwise set to FALSE. */
-static void
-simd_clone_clauses_extract (struct cgraph_node *new_node, tree clauses,
+static struct cgraph_simd_clone *
+simd_clone_clauses_extract (struct cgraph_node *node, tree clauses,
bool *inbranch_specified)
{
tree t;
int n = 0;
*inbranch_specified = false;
- for (t = DECL_ARGUMENTS (new_node->decl); t; t = DECL_CHAIN (t))
+ for (t = DECL_ARGUMENTS (node->decl); t; t = DECL_CHAIN (t))
++n;
/* To distinguish from an OpenMP simd clone, Cilk Plus functions to
@@ -11255,22 +11257,21 @@ simd_clone_clauses_extract (struct cgrap
bool cilk_clone
= (flag_enable_cilkplus
&& lookup_attribute ("cilk plus elemental",
- DECL_ATTRIBUTES (new_node->decl)));
+ DECL_ATTRIBUTES (node->decl)));
/* Allocate one more than needed just in case this is an in-branch
clone which will require a mask argument. */
- struct simd_clone *clone_info = simd_clone_struct_alloc (n + 1);
+ struct cgraph_simd_clone *clone_info = simd_clone_struct_alloc (n + 1);
clone_info->nargs = n;
clone_info->cilk_elemental = cilk_clone;
- gcc_assert (!new_node->simdclone);
- new_node->simdclone = clone_info;
if (!clauses)
- return;
+ return clone_info;
clauses = TREE_VALUE (clauses);
if (!clauses || TREE_CODE (clauses) != OMP_CLAUSE)
- return;
+ return clone_info;
+ argno_map args (node->decl);
for (t = clauses; t; t = OMP_CLAUSE_CHAIN (t))
{
switch (OMP_CLAUSE_CODE (t))
@@ -11303,12 +11304,20 @@ simd_clone_clauses_extract (struct cgrap
}
else
{
+ if (POINTER_TYPE_P (TREE_TYPE (args[argno])))
+ step = fold_convert (ssizetype, step);
if (!host_integerp (step, 0))
- warning_at (OMP_CLAUSE_LOCATION (t), 0,
- "ignoring large linear step");
+ {
+ warning_at (OMP_CLAUSE_LOCATION (t), 0,
+ "ignoring large linear step");
+ return NULL;
+ }
else if (integer_zerop (step))
- warning_at (OMP_CLAUSE_LOCATION (t), 0,
- "ignoring zero linear step");
+ {
+ warning_at (OMP_CLAUSE_LOCATION (t), 0,
+ "ignoring zero linear step");
+ return NULL;
+ }
else
{
clone_info->args[argno].arg_type
@@ -11339,39 +11348,19 @@ simd_clone_clauses_extract (struct cgrap
break;
}
}
+ return clone_info;
}
-/* Helper function for mangling vectors. Given a vector size in bits,
- return the corresponding mangling character. */
-
-static char
-vecsize_mangle (unsigned int vecsize)
-{
- switch (vecsize)
- {
- /* The Intel Vector ABI does not provide a mangling character
- for a 64-bit ISA, but this feels like it's keeping with the
- design. */
- case 64: return 'w';
-
- case 128: return 'x';
- case 256: return 'y';
- case 512: return 'z';
- default:
- /* FIXME: We must come up with a default mangling bit. */
- return 'x';
- }
-}
-
-/* Given a SIMD clone in NEW_NODE, calculate the characteristic data
+/* Given a SIMD clone in NODE, calculate the characteristic data
type and return the coresponding type. The characteristic data
type is computed as described in the Intel Vector ABI. */
static tree
-simd_clone_compute_base_data_type (struct cgraph_node *new_node)
+simd_clone_compute_base_data_type (struct cgraph_node *node,
+ struct cgraph_simd_clone *clone_info)
{
tree type = integer_type_node;
- tree fndecl = new_node->decl;
+ tree fndecl = node->decl;
/* a) For non-void function, the characteristic data type is the
return type. */
@@ -11384,9 +11373,8 @@ simd_clone_compute_base_data_type (struc
else
{
argno_map map (fndecl);
- for (unsigned int i = 0; i < new_node->simdclone->nargs; ++i)
- if (new_node->simdclone->args[i].arg_type
- == SIMD_CLONE_ARG_TYPE_VECTOR)
+ for (unsigned int i = 0; i < clone_info->nargs; ++i)
+ if (clone_info->args[i].arg_type == SIMD_CLONE_ARG_TYPE_VECTOR)
{
type = TREE_TYPE (map[i]);
break;
@@ -11413,55 +11401,13 @@ simd_clone_compute_base_data_type (struc
/* Well, we don't handle Xeon Phi yet. */
}
-/* Given a SIMD clone in NEW_NODE, compute simdlen and vector size,
- and store them in NEW_NODE->simdclone. */
-
-static void
-simd_clone_compute_vecsize_and_simdlen (struct cgraph_node *new_node)
-{
- char vmangle = new_node->simdclone->vecsize_mangle;
- /* Vector size for this clone. */
- unsigned int vecsize = 0;
- /* Base vector type, based on function arguments. */
- tree base_type = simd_clone_compute_base_data_type (new_node);
- unsigned int base_type_size = GET_MODE_BITSIZE (TYPE_MODE (base_type));
-
- /* Calculate everything for Cilk Plus clones with appropriate target
- support. This is as specified in the Intel Vector ABI.
-
- Note: Any target which supports the Cilk Plus processor clause
- must also provide appropriate target hooks for calculating
- default ISA/processor (default_vecsize_mangle), and for
- calculating hardware vector size based on ISA/processor
- (vecsize_for_mangle). */
- if (new_node->simdclone->cilk_elemental
- && targetm.cilkplus.default_vecsize_mangle)
- {
- if (!vmangle)
- vmangle = targetm.cilkplus.default_vecsize_mangle (new_node);
- vecsize = targetm.cilkplus.vecsize_for_mangle (vmangle);
- if (!new_node->simdclone->simdlen)
- new_node->simdclone->simdlen = vecsize / base_type_size;
- }
- /* Calculate everything else generically. */
- else
- {
- vecsize = GET_MODE_BITSIZE (targetm.vectorize.preferred_simd_mode
- (TYPE_MODE (base_type)));
- vmangle = vecsize_mangle (vecsize);
- if (!new_node->simdclone->simdlen)
- new_node->simdclone->simdlen = vecsize / base_type_size;
- }
- new_node->simdclone->vecsize_mangle = vmangle;
- new_node->simdclone->hw_vector_size = vecsize;
-}
-
-static void
-simd_clone_mangle (struct cgraph_node *old_node, struct cgraph_node *new_node)
+static tree
+simd_clone_mangle (struct cgraph_node *node,
+ struct cgraph_simd_clone *clone_info)
{
- char vecsize_mangle = new_node->simdclone->vecsize_mangle;
- char mask = new_node->simdclone->inbranch ? 'M' : 'N';
- unsigned int simdlen = new_node->simdclone->simdlen;
+ char vecsize_mangle = clone_info->vecsize_mangle;
+ char mask = clone_info->inbranch ? 'M' : 'N';
+ unsigned int simdlen = clone_info->simdlen;
unsigned int n;
pretty_printer pp;
@@ -11472,9 +11418,9 @@ simd_clone_mangle (struct cgraph_node *o
pp_character (&pp, mask);
pp_decimal_int (&pp, simdlen);
- for (n = 0; n < new_node->simdclone->nargs; ++n)
+ for (n = 0; n < clone_info->nargs; ++n)
{
- struct simd_clone_arg arg = new_node->simdclone->args[n];
+ struct cgraph_simd_clone_arg arg = clone_info->args[n];
if (arg.arg_type == SIMD_CLONE_ARG_TYPE_UNIFORM)
pp_character (&pp, 'u');
@@ -11507,10 +11453,22 @@ simd_clone_mangle (struct cgraph_node *o
pp_underscore (&pp);
pp_string (&pp,
- IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (old_node->decl)));
+ IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (node->decl)));
const char *str = pp_formatted_text (&pp);
- change_decl_assembler_name (new_node->decl,
- get_identifier (str));
+
+ /* If there already is a SIMD clone with the same mangled name, don't
+ add another one. This can happen e.g. for
+ #pragma omp declare simd
+ #pragma omp declare simd simdlen(8)
+ int foo (int, int);
+ if the simdlen is assumed to be 8 for the first one, etc. */
+ for (struct cgraph_node *clone = node->simd_clones; clone;
+ clone = clone->simdclone->next_clone)
+ if (strcmp (IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (clone->decl)),
+ str) == 0)
+ return NULL_TREE;
+
+ return get_identifier (str);
}
/* Create a simd clone of OLD_NODE and return it. */
@@ -11521,16 +11479,12 @@ simd_clone_create (struct cgraph_node *o
struct cgraph_node *new_node;
new_node = cgraph_function_versioning (old_node, vNULL, NULL, NULL, false,
NULL, NULL, "simdclone");
+ if (new_node == NULL)
+ return new_node;
- new_node->simdclone_of = old_node;
-
- /* Keep cgraph friends from removing the clone. */
- new_node->externally_visible
- = old_node->externally_visible;
TREE_PUBLIC (new_node->decl) = TREE_PUBLIC (old_node->decl);
- old_node->has_simd_clones = true;
- /* The function cgraph_function_versioning() will force the new
+ /* The function cgraph_function_versioning () will force the new
symbol local. Undo this, and inherit external visability from
the old node. */
new_node->local.local = old_node->local.local;
@@ -11620,11 +11574,10 @@ simd_clone_adjust_argument_types (struct
ipa_parm_adjustment_vec adjustments;
adjustments.create (args.length ());
- unsigned i;
+ unsigned i, j, veclen;
+ struct ipa_parm_adjustment adj;
for (i = 0; i < node->simdclone->nargs; ++i)
{
- struct ipa_parm_adjustment adj;
-
memset (&adj, 0, sizeof (adj));
tree parm = args[i];
adj.base_index = i;
@@ -11639,40 +11592,72 @@ simd_clone_adjust_argument_types (struct
}
else
{
- adj.simdlen = node->simdclone->simdlen;
+ if (INTEGRAL_TYPE_P (TREE_TYPE (parm))
+ || POINTER_TYPE_P (TREE_TYPE (parm)))
+ veclen = node->simdclone->vecsize_int;
+ else
+ veclen = node->simdclone->vecsize_float;
+ veclen /= GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE (parm)));
+ if (veclen > node->simdclone->simdlen)
+ veclen = node->simdclone->simdlen;
+ adj.simdlen = veclen;
+ adj.arg_prefix = "simd";
if (POINTER_TYPE_P (TREE_TYPE (parm)))
adj.by_ref = 1;
adj.type = TREE_TYPE (parm);
+ for (j = veclen; j < node->simdclone->simdlen; j += veclen)
+ {
+ adjustments.safe_push (adj);
+ if (j == veclen)
+ {
+ memset (&adj, 0, sizeof (adj));
+ adj.op = IPA_PARM_OP_NEW;
+ adj.arg_prefix = "simd";
+ adj.base_index = i;
+ adj.type = build_vector_type (TREE_TYPE (parm), veclen);
+ }
+ }
node->simdclone->args[i].simd_array
= create_tmp_simd_array (IDENTIFIER_POINTER (DECL_NAME (parm)),
TREE_TYPE (parm),
node->simdclone->simdlen);
}
- adj.arg_prefix = "simd";
- adjustments.quick_push (adj);
+ adjustments.safe_push (adj);
}
if (node->simdclone->inbranch)
{
- struct ipa_parm_adjustment adj;
+ tree base_type
+ = simd_clone_compute_base_data_type (node->simdclone->origin,
+ node->simdclone);
memset (&adj, 0, sizeof (adj));
adj.op = IPA_PARM_OP_NEW;
adj.arg_prefix = "mask";
+
adj.base_index = i;
- adj.type
- = build_vector_type (integer_type_node, node->simdclone->simdlen);
+ if (INTEGRAL_TYPE_P (base_type) || POINTER_TYPE_P (base_type))
+ veclen = node->simdclone->vecsize_int;
+ else
+ veclen = node->simdclone->vecsize_float;
+ if (veclen > node->simdclone->simdlen)
+ veclen = node->simdclone->simdlen;
+ adj.type = build_vector_type (base_type, veclen);
adjustments.safe_push (adj);
+ for (j = veclen; j < node->simdclone->simdlen; j += veclen)
+ adjustments.safe_push (adj);
+
/* We have previously allocated one extra entry for the mask. Use
it and fill it. */
- struct simd_clone *sc = node->simdclone;
+ struct cgraph_simd_clone *sc = node->simdclone;
sc->nargs++;
sc->args[i].orig_arg = build_decl (UNKNOWN_LOCATION, PARM_DECL, NULL,
- integer_type_node);
+ base_type);
sc->args[i].simd_array
- = create_tmp_simd_array ("mask", integer_type_node, sc->simdlen);
+ = create_tmp_simd_array ("mask", base_type, sc->simdlen);
+ sc->args[i].arg_type = SIMD_CLONE_ARG_TYPE_MASK;
}
ipa_modify_formal_parameters (node->decl, adjustments);
@@ -11688,21 +11673,50 @@ simd_clone_init_simd_arrays (struct cgra
ipa_parm_adjustment_vec adjustments)
{
gimple_seq seq = NULL;
- unsigned i = 0;
+ unsigned i = 0, j = 0, k;
for (tree arg = DECL_ARGUMENTS (node->decl);
arg;
- arg = DECL_CHAIN (arg), i++)
+ arg = DECL_CHAIN (arg), i++, j++)
{
- if (adjustments[i].op == IPA_PARM_OP_COPY)
+ if (adjustments[j].op == IPA_PARM_OP_COPY)
continue;
node->simdclone->args[i].vector_arg = arg;
tree array = node->simdclone->args[i].simd_array;
- tree t = build1 (VIEW_CONVERT_EXPR, TREE_TYPE (array), arg);
- t = build2 (MODIFY_EXPR, TREE_TYPE (array), array, t);
- gimplify_and_add (t, &seq);
+ if ((unsigned) adjustments[j].simdlen == node->simdclone->simdlen)
+ {
+ tree ptype = build_pointer_type (TREE_TYPE (TREE_TYPE (array)));
+ tree ptr = build_fold_addr_expr (array);
+ tree t = build2 (MEM_REF, TREE_TYPE (arg), ptr,
+ build_int_cst (ptype, 0));
+ t = build2 (MODIFY_EXPR, TREE_TYPE (t), t, arg);
+ gimplify_and_add (t, &seq);
+ }
+ else
+ {
+ unsigned int simdlen = adjustments[j].simdlen;
+ if (node->simdclone->args[i].arg_type == SIMD_CLONE_ARG_TYPE_MASK)
+ simdlen = TYPE_VECTOR_SUBPARTS (TREE_TYPE (arg));
+ tree ptype = build_pointer_type (TREE_TYPE (TREE_TYPE (array)));
+ for (k = 0; k < node->simdclone->simdlen; k += simdlen)
+ {
+ tree ptr = build_fold_addr_expr (array);
+ int elemsize;
+ if (k)
+ {
+ arg = DECL_CHAIN (arg);
+ j++;
+ }
+ elemsize
+ = GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (TREE_TYPE (arg))));
+ tree t = build2 (MEM_REF, TREE_TYPE (arg), ptr,
+ build_int_cst (ptype, k * elemsize));
+ t = build2 (MODIFY_EXPR, TREE_TYPE (t), t, arg);
+ gimplify_and_add (t, &seq);
+ }
+ }
}
return seq;
}
@@ -11793,22 +11807,26 @@ ipa_simd_modify_function_body (struct cg
tree retval_array, tree iter)
{
basic_block bb;
+ unsigned int i, j;
/* Re-use the adjustments array, but this time use it to replace
every function argument use to an offset into the corresponding
simd_array. */
- for (unsigned i = 0; i < node->simdclone->nargs; ++i)
+ for (i = 0, j = 0; i < node->simdclone->nargs; ++i, ++j)
{
if (!node->simdclone->args[i].vector_arg)
continue;
tree basetype = TREE_TYPE (node->simdclone->args[i].orig_arg);
- adjustments[i].new_decl
+ adjustments[j].new_decl
= build4 (ARRAY_REF,
basetype,
node->simdclone->args[i].simd_array,
iter,
NULL_TREE, NULL_TREE);
+ if (adjustments[j].op == IPA_PARM_OP_NONE
+ && (unsigned) adjustments[j].simdlen < node->simdclone->simdlen)
+ j += node->simdclone->simdlen / adjustments[j].simdlen - 1;
}
struct modify_stmt_info info;
@@ -11885,14 +11903,15 @@ simd_clone_adjust (struct cgraph_node *n
// 2. Handle linear/uniform arguments in get_simd_clone/etc.
// 3. Bail on non-SLP vectorizer mode.
- // FIXME: __attribute__((target (something))) if needed
-
// FIXME: get_simd_clone() needs optimization.
push_cfun (DECL_STRUCT_FUNCTION (node->decl));
+ targetm.simd_clone.adjust (node);
+
tree retval = simd_clone_adjust_return_type (node);
- ipa_parm_adjustment_vec adjustments = simd_clone_adjust_argument_types (node);
+ ipa_parm_adjustment_vec adjustments
+ = simd_clone_adjust_argument_types (node);
struct gimplify_ctx gctx;
push_gimplify_context (&gctx);
@@ -11955,15 +11974,24 @@ simd_clone_adjust (struct cgraph_node *n
gimple_stmt_iterator gsi = gsi_last_bb (loop->header);
tree mask_array
= node->simdclone->args[node->simdclone->nargs - 1].simd_array;
- tree mask = create_tmp_var (integer_type_node, NULL);
+ tree mask = make_ssa_name (TREE_TYPE (TREE_TYPE (mask_array)), NULL);
tree aref = build4 (ARRAY_REF,
- integer_type_node,
+ TREE_TYPE (TREE_TYPE (mask_array)),
mask_array, iter,
NULL, NULL);
g = gimple_build_assign (mask, aref);
gsi_insert_after (&gsi, g, GSI_CONTINUE_LINKING);
+ int bitsize = GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE (aref)));
+ if (!INTEGRAL_TYPE_P (TREE_TYPE (aref)))
+ {
+ aref = build1 (VIEW_CONVERT_EXPR,
+ build_nonstandard_integer_type (bitsize, 0), mask);
+ mask = make_ssa_name (TREE_TYPE (TREE_TYPE (aref)), NULL);
+ g = gimple_build_assign (mask, aref);
+ gsi_insert_after (&gsi, g, GSI_CONTINUE_LINKING);
+ }
- g = gimple_build_cond (EQ_EXPR, mask, integer_zero_node,
+ g = gimple_build_cond (EQ_EXPR, mask, build_zero_cst (TREE_TYPE (mask)),
NULL, NULL);
gsi_insert_after (&gsi, g, GSI_CONTINUE_LINKING);
make_edge (loop->header, incr_bb, EDGE_TRUE_VALUE);
@@ -12021,32 +12049,69 @@ expand_simd_clones (struct cgraph_node *
tree attr = lookup_attribute ("omp declare simd",
DECL_ATTRIBUTES (node->decl));
- if (!attr)
+ if (!attr || targetm.simd_clone.compute_vecsize_and_simdlen == NULL)
return;
do
{
- struct cgraph_node *new_node = simd_clone_create (node);
-
bool inbranch_clause_specified;
- simd_clone_clauses_extract (new_node, TREE_VALUE (attr),
- &inbranch_clause_specified);
- simd_clone_compute_vecsize_and_simdlen (new_node);
- simd_clone_mangle (node, new_node);
- simd_clone_adjust (new_node);
-
- /* If no inbranch clause was specified, we need both variants.
- We have already created the not-in-branch version above, by
- virtue of .inbranch being clear. Create the masked in-branch
- version. */
- if (!inbranch_clause_specified)
+ struct cgraph_simd_clone *clone_info
+ = simd_clone_clauses_extract (node, TREE_VALUE (attr),
+ &inbranch_clause_specified);
+ if (clone_info == NULL)
+ continue;
+
+ int orig_simdlen = clone_info->simdlen;
+ tree base_type = simd_clone_compute_base_data_type (node, clone_info);
+ int count
+ = targetm.simd_clone.compute_vecsize_and_simdlen (node, clone_info,
+ base_type, 0);
+ if (count == 0)
+ continue;
+
+ for (int i = 0; i < count * 2; i++)
{
+ struct cgraph_simd_clone *clone = clone_info;
+ if (inbranch_clause_specified && (i & 1) != 0)
+ continue;
+
+ if (i != 0)
+ {
+ clone = simd_clone_struct_alloc (clone_info->nargs
+ - clone_info->inbranch
+ + ((i & 1) != 0));
+ simd_clone_struct_copy (clone, clone_info);
+ clone->nargs -= clone_info->inbranch;
+ clone->simdlen = orig_simdlen;
+ targetm.simd_clone.compute_vecsize_and_simdlen (node, clone,
+ base_type,
+ i / 2);
+ if ((i & 1) != 0)
+ clone->inbranch = 1;
+ }
+
+ tree id = simd_clone_mangle (node, clone);
+ if (id == NULL_TREE)
+ continue;
+
struct cgraph_node *n = simd_clone_create (node);
- struct simd_clone *clone
- = simd_clone_struct_alloc (new_node->simdclone->nargs);
- simd_clone_struct_copy (clone, new_node->simdclone);
- clone->inbranch = 1;
+ if (n == NULL)
+ continue;
+
n->simdclone = clone;
- simd_clone_mangle (node, n);
+ clone->origin = node;
+ clone->next_clone = NULL;
+ if (node->simd_clones == NULL)
+ {
+ clone->prev_clone = n;
+ node->simd_clones = n;
+ }
+ else
+ {
+ clone->prev_clone = node->simd_clones->simdclone->prev_clone;
+ clone->prev_clone->simdclone->next_clone = n;
+ node->simd_clones->simdclone->prev_clone = n;
+ }
+ change_decl_assembler_name (n->decl, id);
simd_clone_adjust (n);
}
}
@@ -3633,8 +3633,9 @@ c_builtin_function_ext_scope (tree decl)
const char *name = IDENTIFIER_POINTER (id);
C_DECL_BUILTIN_PROTOTYPE (decl) = prototype_p (type);
- bind (id, decl, external_scope, /*invisible=*/false, /*nested=*/false,
- UNKNOWN_LOCATION);
+ if (external_scope)
+ bind (id, decl, external_scope, /*invisible=*/false, /*nested=*/false,
+ UNKNOWN_LOCATION);
/* Builtins in the implementation namespace are made visible without
needing to be explicitly declared. See push_file_scope. */
@@ -429,7 +429,7 @@ determine_versionability (struct cgraph_
reason = "not a tree_versionable_function";
else if (cgraph_function_body_availability (node) <= AVAIL_OVERWRITABLE)
reason = "insufficient body availability";
- else if (node->has_simd_clones)
+ else if (node->simd_clones != NULL)
{
/* Ideally we should clone the SIMD clones themselves and create
vector copies of them, so IPA-cp and SIMD clones can happily
@@ -3096,29 +3096,4 @@ gimple_check_call_matching_types (gimple
return true;
}
-/* Given a NODE, return a compatible SIMD clone returning `vectype'.
- If none found, NULL is returned. */
-
-struct cgraph_node *
-get_simd_clone (struct cgraph_node *node, tree vectype)
-{
- if (!node->has_simd_clones)
- return NULL;
-
- /* FIXME: What to do with linear/uniform arguments. */
-
- /* FIXME: Nasty kludge until we figure out where to put the clone
- list-- perhaps, next_sibling_clone/prev_sibling_clone in
- cgraph_node ??. */
- struct cgraph_node *t;
- FOR_EACH_FUNCTION (t)
- if (t->simdclone_of == node
- /* No inbranch vectorization for now. */
- && !t->simdclone->inbranch
- && types_compatible_p (TREE_TYPE (TREE_TYPE (t->decl)),
- vectype))
- break;
- return t;
-}
-
#include "gt-cgraph.h"
@@ -1508,34 +1508,35 @@ hook_int_uint_mode_1)
HOOK_VECTOR_END (sched)
-/* Functions relating to Cilk Plus. */
+/* Functions relating to OpenMP and Cilk Plus SIMD clones. */
#undef HOOK_PREFIX
-#define HOOK_PREFIX "TARGET_CILKPLUS_"
-HOOK_VECTOR (TARGET_CILKPLUS, cilkplus)
+#define HOOK_PREFIX "TARGET_SIMD_CLONE_"
+HOOK_VECTOR (TARGET_SIMD_CLONE, simd_clone)
DEFHOOK
-(default_vecsize_mangle,
-"This hook should return the default mangling character when no vector\n\
-size can be determined by examining the Cilk Plus @code{processor} clause.\n\
-This is as specified in the Intel Vector ABI document.\n\
-\n\
-This hook, as well as @code{max_vector_size_for_isa} below must be set\n\
-to support the Cilk Plus @code{processor} clause.\n\
-\n\
-The only argument is a @var{cgraph_node} containing the clone.",
-char, (struct cgraph_node *), NULL)
+(compute_vecsize_and_simdlen,
+"This hook should set @var{vecsize_mangle}, @var{vecsize_int}, @var{vecsize_float}\n\
+fields in @var{simd_clone} structure pointed by @var{clone_info} argument and also\n\
+@var{simdlen} field if it was previously 0.\n\
+The hook should return 0 if SIMD clones shouldn't be emitted,\n\
+or number of @var{vecsize_mangle} variants that should be emitted.",
+int, (struct cgraph_node *, struct cgraph_simd_clone *, tree, int), NULL)
DEFHOOK
-(vecsize_for_mangle,
-"This hook returns the maximum hardware vector size in bits for a given\n\
-mangling character. The character is as described in Intel's\n\
-Vector ABI (see @var{ISA} character in the section on mangling).\n\
-\n\
-This hook must be defined in order to support the Cilk Plus @code{processor}\n\
-clause.",
-unsigned int, (char), NULL)
+(adjust,
+"This hook should add implicit @code{attribute(target(\"...\"))} attribute\n\
+to SIMD clone @var{node} if needed.",
+void, (struct cgraph_node *), NULL)
-HOOK_VECTOR_END (cilkplus)
+DEFHOOK
+(usable,
+"This hook should return -1 if SIMD clone @var{node} shouldn't be used\n\
+in vectorized loops in current function, or non-negative number if it is\n\
+usable. In that case, the smaller the number is, the more desirable it is\n\
+to use it.",
+int, (struct cgraph_node *), NULL)
+
+HOOK_VECTOR_END (simd_clone)
/* Functions relating to vectorization. */
#undef HOOK_PREFIX
@@ -2988,7 +2988,7 @@ vect_analyze_data_refs (loop_vec_info lo
if (fndecl != NULL_TREE)
{
struct cgraph_node *node = cgraph_get_node (fndecl);
- if (node != NULL && node->has_simd_clones)
+ if (node != NULL && node->simd_clones != NULL)
{
unsigned int j, n = gimple_call_num_args (stmt);
for (j = 0; j < n; j++)
@@ -92,6 +92,8 @@ extern bool target_default_pointer_addre
struct stdarg_info;
struct spec_info_def;
struct hard_reg_set_container;
+struct cgraph_node;
+struct cgraph_simd_clone;
/* The struct used by the secondary_reload target hook. */
typedef struct secondary_reload_info
@@ -4420,9 +4420,11 @@ address; but often a machine-dependent
@hook TARGET_VECTORIZE_BUILTIN_GATHER
-@hook TARGET_CILKPLUS_DEFAULT_VECSIZE_MANGLE
+@hook TARGET_SIMD_CLONE_COMPUTE_VECSIZE_AND_SIMDLEN
-@hook TARGET_CILKPLUS_VECSIZE_FOR_MANGLE
+@hook TARGET_SIMD_CLONE_ADJUST
+
+@hook TARGET_SIMD_CLONE_USABLE
@node Anchored Addresses
@section Anchored Addresses
@@ -5814,24 +5814,24 @@ The default is @code{NULL_TREE} which me
loads.
@end deftypefn
-@deftypefn {Target Hook} char TARGET_CILKPLUS_DEFAULT_VECSIZE_MANGLE (struct cgraph_node *@var{})
-This hook should return the default mangling character when no vector
-size can be determined by examining the Cilk Plus @code{processor} clause.
-This is as specified in the Intel Vector ABI document.
-
-This hook, as well as @code{max_vector_size_for_isa} below must be set
-to support the Cilk Plus @code{processor} clause.
-
-The only argument is a @var{cgraph_node} containing the clone.
+@deftypefn {Target Hook} int TARGET_SIMD_CLONE_COMPUTE_VECSIZE_AND_SIMDLEN (struct cgraph_node *@var{}, struct cgraph_simd_clone *@var{}, @var{tree}, @var{int})
+This hook should set @var{vecsize_mangle}, @var{vecsize_int}, @var{vecsize_float}
+fields in @var{simd_clone} structure pointed by @var{clone_info} argument and also
+@var{simdlen} field if it was previously 0.
+The hook should return 0 if SIMD clones shouldn't be emitted,
+or number of @var{vecsize_mangle} variants that should be emitted.
@end deftypefn
-@deftypefn {Target Hook} {unsigned int} TARGET_CILKPLUS_VECSIZE_FOR_MANGLE (char)
-This hook returns the maximum hardware vector size in bits for a given
-mangling character. The character is as described in Intel's
-Vector ABI (see @var{ISA} character in the section on mangling).
+@deftypefn {Target Hook} void TARGET_SIMD_CLONE_ADJUST (struct cgraph_node *@var{})
+This hook should add implicit @code{attribute(target("..."))} attribute
+to SIMD clone @var{node} if needed.
+@end deftypefn
-This hook must be defined in order to support the Cilk Plus @code{processor}
-clause.
+@deftypefn {Target Hook} int TARGET_SIMD_CLONE_USABLE (struct cgraph_node *@var{})
+This hook should return -1 if SIMD clone @var{node} shouldn't be used
+in vectorized loops in current function, or non-negative number if it is
+usable. In that case, the smaller the number is, the more desirable it is
+to use it.
@end deftypefn
@node Anchored Addresses
@@ -250,18 +250,19 @@ struct GTY(()) cgraph_clone_info
bitmap combined_args_to_skip;
};
-enum simd_clone_arg_type
+enum cgraph_simd_clone_arg_type
{
SIMD_CLONE_ARG_TYPE_VECTOR,
SIMD_CLONE_ARG_TYPE_UNIFORM,
SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP,
- SIMD_CLONE_ARG_TYPE_LINEAR_VARIABLE_STEP
+ SIMD_CLONE_ARG_TYPE_LINEAR_VARIABLE_STEP,
+ SIMD_CLONE_ARG_TYPE_MASK
};
/* Function arguments in the original function of a SIMD clone.
Supplementary data for `struct simd_clone'. */
-struct GTY(()) simd_clone_arg {
+struct GTY(()) cgraph_simd_clone_arg {
/* Original function argument as it orignally existed in
DECL_ARGUMENTS. */
tree orig_arg;
@@ -285,7 +286,7 @@ struct GTY(()) simd_clone_arg {
/* A SIMD clone's argument can be either linear (constant or
variable), uniform, or vector. */
- enum simd_clone_arg_type arg_type;
+ enum cgraph_simd_clone_arg_type arg_type;
/* For arg_type SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP this is
the constant linear step, if arg_type is
@@ -299,7 +300,7 @@ struct GTY(()) simd_clone_arg {
/* Specific data for a SIMD function clone. */
-struct GTY(()) simd_clone {
+struct GTY(()) cgraph_simd_clone {
/* Number of words in the SIMD lane associated with this clone. */
unsigned int simdlen;
@@ -307,8 +308,11 @@ struct GTY(()) simd_clone {
usually the number of named arguments in FNDECL. */
unsigned int nargs;
- /* Max hardware vector size in bits. */
- unsigned int hw_vector_size;
+ /* Max hardware vector size in bits for integral vectors. */
+ unsigned int vecsize_int;
+
+ /* Max hardware vector size in bits for floating point vectors. */
+ unsigned int vecsize_float;
/* The mangling character for a given vector size. This is is used
to determine the ISA mangling bit as specified in the Intel
@@ -322,8 +326,14 @@ struct GTY(()) simd_clone {
/* True if this is a Cilk Plus variant. */
unsigned int cilk_elemental : 1;
+ /* Doubly linked list of SIMD clones. */
+ struct cgraph_node *prev_clone, *next_clone;
+
+ /* Original cgraph node the SIMD clones were created for. */
+ struct cgraph_node *origin;
+
/* Annotated function arguments for the original function. */
- struct simd_clone_arg GTY((length ("%h.nargs"))) args[1];
+ struct cgraph_simd_clone_arg GTY((length ("%h.nargs"))) args[1];
};
@@ -356,11 +366,9 @@ public:
/* If this is a SIMD clone, this points to the SIMD specific
information for it. */
- struct simd_clone *simdclone;
-
- /* If this is a SIMD clone, this points to the original scalar
- function. */
- struct cgraph_node *simdclone_of;
+ struct cgraph_simd_clone *simdclone;
+ /* If this function has SIMD clones, this points to the first clone. */
+ struct cgraph_node *simd_clones;
/* Interprocedural passes scheduled to have their transform functions
applied next time we execute local pass on them. We maintain it
@@ -403,8 +411,6 @@ public:
/* ?? We should be able to remove this. We have enough bits in
cgraph to calculate it. */
unsigned tm_clone : 1;
- /* True if this function has SIMD clones. */
- unsigned has_simd_clones : 1;
/* True if this decl is a dispatcher for function versions. */
unsigned dispatcher_function : 1;
};
@@ -814,7 +820,6 @@ void cgraph_speculative_call_info (struc
struct cgraph_edge *&,
struct ipa_ref *&);
extern bool gimple_check_call_matching_types (gimple, tree, bool);
-struct cgraph_node *get_simd_clone (struct cgraph_node *, tree);
/* In cgraphunit.c */
struct asm_node *add_asm_node (tree);
@@ -2184,7 +2184,7 @@ vectorizable_simd_clone_call (gimple stm
return false;
struct cgraph_node *node = cgraph_get_node (fndecl);
- if (node == NULL || !node->has_simd_clones)
+ if (node == NULL || node->simd_clones == NULL)
return false;
if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
@@ -2256,70 +2256,72 @@ vectorizable_simd_clone_call (gimple stm
}
unsigned int badness = 0;
- /* FIXME: Nasty kludge until we figure out where to put the clone
- list-- perhaps, next_sibling_clone/prev_sibling_clone in
- cgraph_node ??. */
- struct cgraph_node *bestn = NULL, *n;
- FOR_EACH_FUNCTION (n)
- if (n->simdclone_of == node)
- {
- unsigned int this_badness = 0;
- if (n->simdclone->simdlen
- > (unsigned) LOOP_VINFO_VECT_FACTOR (loop_vinfo)
- || n->simdclone->nargs != nargs)
- continue;
- if (n->simdclone->simdlen
- < (unsigned) LOOP_VINFO_VECT_FACTOR (loop_vinfo))
- this_badness += (exact_log2 (LOOP_VINFO_VECT_FACTOR (loop_vinfo))
- - exact_log2 (n->simdclone->simdlen)) * 1024;
- if (n->simdclone->inbranch)
- this_badness += 2048;
- /* FORNOW: Have to add code to add the mask argument. */
- if (n->simdclone->inbranch)
- continue;
- for (i = 0; i < nargs; i++)
- {
- switch (n->simdclone->args[i].arg_type)
- {
- case SIMD_CLONE_ARG_TYPE_VECTOR:
- if (arginfo[i].vectype == NULL_TREE
- || arginfo[i].linear_step)
- this_badness += 64;
- break;
- case SIMD_CLONE_ARG_TYPE_UNIFORM:
- if (arginfo[i].vectype != NULL_TREE)
- i = -1;
- break;
- case SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP:
- if (arginfo[i].vectype == NULL_TREE
- || (arginfo[i].linear_step
- != n->simdclone->args[i].linear_step))
- i = -1;
- break;
- case SIMD_CLONE_ARG_TYPE_LINEAR_VARIABLE_STEP:
- /* FORNOW */
+ struct cgraph_node *bestn = NULL;
+ for (struct cgraph_node *n = node->simd_clones; n != NULL;
+ n = n->simdclone->next_clone)
+ {
+ unsigned int this_badness = 0;
+ if (n->simdclone->simdlen
+ > (unsigned) LOOP_VINFO_VECT_FACTOR (loop_vinfo)
+ || n->simdclone->nargs != nargs)
+ continue;
+ if (n->simdclone->simdlen
+ < (unsigned) LOOP_VINFO_VECT_FACTOR (loop_vinfo))
+ this_badness += (exact_log2 (LOOP_VINFO_VECT_FACTOR (loop_vinfo))
+ - exact_log2 (n->simdclone->simdlen)) * 1024;
+ if (n->simdclone->inbranch)
+ this_badness += 2048;
+ int target_badness = targetm.simd_clone.usable (n);
+ if (target_badness < 0)
+ continue;
+ this_badness += target_badness * 512;
+ /* FORNOW: Have to add code to add the mask argument. */
+ if (n->simdclone->inbranch)
+ continue;
+ for (i = 0; i < nargs; i++)
+ {
+ switch (n->simdclone->args[i].arg_type)
+ {
+ case SIMD_CLONE_ARG_TYPE_VECTOR:
+ if (arginfo[i].vectype == NULL_TREE || arginfo[i].linear_step)
+ this_badness += 64;
+ break;
+ case SIMD_CLONE_ARG_TYPE_UNIFORM:
+ if (arginfo[i].vectype != NULL_TREE)
i = -1;
- break;
- }
- if (i == (size_t) -1)
break;
- if (n->simdclone->args[i].alignment > arginfo[i].align)
- {
+ case SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP:
+ if (arginfo[i].vectype == NULL_TREE
+ || (arginfo[i].linear_step
+ != n->simdclone->args[i].linear_step))
i = -1;
- break;
- }
- if (arginfo[i].align)
- this_badness += (exact_log2 (arginfo[i].align)
- - exact_log2 (n->simdclone->args[i].alignment));
- }
- if (i == (size_t) -1)
- continue;
- if (bestn == NULL || this_badness < badness)
- {
- bestn = n;
- badness = this_badness;
- }
- }
+ break;
+ case SIMD_CLONE_ARG_TYPE_LINEAR_VARIABLE_STEP:
+ /* FORNOW */
+ i = -1;
+ break;
+ case SIMD_CLONE_ARG_TYPE_MASK:
+ gcc_unreachable ();
+ }
+ if (i == (size_t) -1)
+ break;
+ if (n->simdclone->args[i].alignment > arginfo[i].align)
+ {
+ i = -1;
+ break;
+ }
+ if (arginfo[i].align)
+ this_badness += (exact_log2 (arginfo[i].align)
+ - exact_log2 (n->simdclone->args[i].alignment));
+ }
+ if (i == (size_t) -1)
+ continue;
+ if (bestn == NULL || this_badness < badness)
+ {
+ bestn = n;
+ badness = this_badness;
+ }
+ }
if (bestn == NULL)
{
@@ -2334,7 +2336,8 @@ vectorizable_simd_clone_call (gimple stm
/* If the function isn't const, only allow it in simd loops where user
has asserted that at least nunits consecutive iterations can be
performed using SIMD instructions. */
- if ((loop == NULL || loop->safelen < nunits) && gimple_vuse (stmt))
+ if ((loop == NULL || (unsigned) loop->safelen < nunits)
+ && gimple_vuse (stmt))
{
arginfo.release ();
return false;
@@ -2381,81 +2384,90 @@ vectorizable_simd_clone_call (gimple stm
for (i = 0; i < nargs; i++)
{
- unsigned int k, l;
+ unsigned int k, l, m, o;
tree atype;
op = gimple_call_arg (stmt, i);
switch (bestn->simdclone->args[i].arg_type)
{
case SIMD_CLONE_ARG_TYPE_VECTOR:
- /* FIXME */
atype = TREE_TYPE (bestn->simdclone->args[i].vector_arg);
- gcc_assert (TYPE_VECTOR_SUBPARTS (atype) == nunits);
- if (nunits < TYPE_VECTOR_SUBPARTS (arginfo[i].vectype))
+ o = nunits / TYPE_VECTOR_SUBPARTS (atype);
+ for (m = j * o; m < (j + 1) * o; m++)
{
- unsigned int prec = GET_MODE_BITSIZE (TYPE_MODE (atype));
- k = TYPE_VECTOR_SUBPARTS (arginfo[i].vectype) / nunits;
- gcc_assert ((k & (k - 1)) == 0);
- if (j == 0)
- vec_oprnd0
- = vect_get_vec_def_for_operand (op, stmt, NULL);
- else
+ if (TYPE_VECTOR_SUBPARTS (atype)
+ < TYPE_VECTOR_SUBPARTS (arginfo[i].vectype))
{
- vec_oprnd0 = arginfo[i].op;
- if ((j & (k - 1)) == 0)
+ unsigned int prec = GET_MODE_BITSIZE (TYPE_MODE (atype));
+ k = (TYPE_VECTOR_SUBPARTS (arginfo[i].vectype)
+ / TYPE_VECTOR_SUBPARTS (atype));
+ gcc_assert ((k & (k - 1)) == 0);
+ if (m == 0)
vec_oprnd0
- = vect_get_vec_def_for_stmt_copy (arginfo[i].dt,
- vec_oprnd0);
+ = vect_get_vec_def_for_operand (op, stmt, NULL);
+ else
+ {
+ vec_oprnd0 = arginfo[i].op;
+ if ((m & (k - 1)) == 0)
+ vec_oprnd0
+ = vect_get_vec_def_for_stmt_copy (arginfo[i].dt,
+ vec_oprnd0);
+ }
+ arginfo[i].op = vec_oprnd0;
+ vec_oprnd0
+ = build3 (BIT_FIELD_REF, atype, vec_oprnd0,
+ build_int_cst (integer_type_node, prec),
+ build_int_cst (integer_type_node,
+ (m & (k - 1)) * prec));
+ new_stmt
+ = gimple_build_assign_with_ops (BIT_FIELD_REF,
+ make_ssa_name (atype,
+ NULL),
+ vec_oprnd0, NULL_TREE);
+ vect_finish_stmt_generation (stmt, new_stmt, gsi);
+ vargs.safe_push (gimple_assign_lhs (new_stmt));
}
- arginfo[i].op = vec_oprnd0;
- vec_oprnd0 = build3 (BIT_FIELD_REF, atype, vec_oprnd0,
- build_int_cst (integer_type_node, prec),
- build_int_cst (integer_type_node,
- (j & (k - 1)) * prec));
- new_stmt
- = gimple_build_assign_with_ops (BIT_FIELD_REF,
- make_ssa_name (atype,
- NULL),
- vec_oprnd0, NULL_TREE);
- vect_finish_stmt_generation (stmt, new_stmt, gsi);
- vargs.quick_push (gimple_assign_lhs (new_stmt));
- break;
- }
- k = nunits / TYPE_VECTOR_SUBPARTS (arginfo[i].vectype);
- gcc_assert ((k & (k - 1)) == 0);
- vec<constructor_elt, va_gc> *ctor_elts;
- if (k != 1)
- vec_alloc (ctor_elts, k);
- else
- ctor_elts = NULL;
- for (l = 0; l < k; l++)
- {
- if (j == 0 && l == 0)
- vec_oprnd0
- = vect_get_vec_def_for_operand (op, stmt, NULL);
else
- vec_oprnd0
- = vect_get_vec_def_for_stmt_copy (arginfo[i].dt,
- arginfo[i].op);
- arginfo[i].op = vec_oprnd0;
- if (k == 1)
- break;
- CONSTRUCTOR_APPEND_ELT (ctor_elts, NULL_TREE, vec_oprnd0);
- }
- if (k == 1)
- {
- vargs.quick_push (vec_oprnd0);
- break;
+ {
+ k = (TYPE_VECTOR_SUBPARTS (atype)
+ / TYPE_VECTOR_SUBPARTS (arginfo[i].vectype));
+ gcc_assert ((k & (k - 1)) == 0);
+ vec<constructor_elt, va_gc> *ctor_elts;
+ if (k != 1)
+ vec_alloc (ctor_elts, k);
+ else
+ ctor_elts = NULL;
+ for (l = 0; l < k; l++)
+ {
+ if (m == 0 && l == 0)
+ vec_oprnd0
+ = vect_get_vec_def_for_operand (op, stmt, NULL);
+ else
+ vec_oprnd0
+ = vect_get_vec_def_for_stmt_copy (arginfo[i].dt,
+ arginfo[i].op);
+ arginfo[i].op = vec_oprnd0;
+ if (k == 1)
+ break;
+ CONSTRUCTOR_APPEND_ELT (ctor_elts, NULL_TREE,
+ vec_oprnd0);
+ }
+ if (k == 1)
+ vargs.safe_push (vec_oprnd0);
+ else
+ {
+ vec_oprnd0 = build_constructor (atype, ctor_elts);
+ new_stmt
+ = gimple_build_assign_with_ops
+ (CONSTRUCTOR, make_ssa_name (atype, NULL),
+ vec_oprnd0, NULL_TREE);
+ vect_finish_stmt_generation (stmt, new_stmt, gsi);
+ vargs.safe_push (gimple_assign_lhs (new_stmt));
+ }
+ }
}
- vec_oprnd0 = build_constructor (atype, ctor_elts);
- new_stmt
- = gimple_build_assign_with_ops (CONSTRUCTOR,
- make_ssa_name (atype, NULL),
- vec_oprnd0, NULL_TREE);
- vect_finish_stmt_generation (stmt, new_stmt, gsi);
- vargs.quick_push (gimple_assign_lhs (new_stmt));
break;
case SIMD_CLONE_ARG_TYPE_UNIFORM:
- vargs.quick_push (op);
+ vargs.safe_push (op);
break;
case SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP:
if (j == 0)
@@ -2498,7 +2510,7 @@ vectorizable_simd_clone_call (gimple stm
add_phi_arg (new_phi, phi_arg, loop_latch_edge (loop),
UNKNOWN_LOCATION);
arginfo[i].op = phi_res;
- vargs.quick_push (phi_res);
+ vargs.safe_push (phi_res);
}
else
{
@@ -2516,7 +2528,7 @@ vectorizable_simd_clone_call (gimple stm
= gimple_build_assign_with_ops (code, new_temp,
arginfo[i].op, tcst);
vect_finish_stmt_generation (stmt, new_stmt, gsi);
- vargs.quick_push (new_temp);
+ vargs.safe_push (new_temp);
}
break;
case SIMD_CLONE_ARG_TYPE_LINEAR_VARIABLE_STEP: