@@ -1,3 +1,45 @@
+2013-09-24 Aldy Hernandez <aldyh@redhat.com>
+
+ * Makefile.in (omp-low.o): Depend on PRETTY_PRINT_H.
+ * ipa-cp.c (determine_versionability): Nodes with SIMD clones are
+ not versionable.
+ * ggc.h (ggc_alloc_cleared_simd_clone_stat): New.
+ * cgraph.h (enum linear_stride_type): New.
+ (struct simd_clone_arg): New.
+ (struct simd_clone): New.
+ (struct cgraph_node): Add `simdclone' field.
+ Add `has_simd_clones' field.
+ * omp-low.c: Add new pass_omp_simd_clone support code.
+ (vecsize_mangle): New.
+ (ipa_omp_simd_clone): New.
+ (simd_clone_clauses_extract): New.
+ (simd_clone_compute_base_data_type): New.
+ (simd_clone_compute_isa_and_simdlen): New.
+ (simd_clone_create): New.
+ (simd_clone_mangle): New.
+ (simd_clone_struct_allow): New.
+ (simd_clone_struct_copy): New.
+ (class argno_map): New.
+ (argno_map::argno_map(tree)): New.
+ (argno_map::~argno_map): New.
+ (argno_map::to_tree): New.
+ * tree.h (OMP_CLAUSE_LINEAR_VARIABLE_STRIDE): New.
+ * tree-core.h (OMP_CLAUSE_LINEAR_VARIABLE_STRIDE): Document.
+ * tree-pass.h (make_pass_omp_simd_clone): New.
+ * passes.def (pass_omp_simd_clone): New.
+ * target.def: Define new hook prefix "TARGET_CILKPLUS_".
+ (default_vector_mangling_isa_code): New.
+ (max_vector_size_for_isa): New.
+ * doc/tm.texi.in: Add placeholder for
+ TARGET_CILKPLUS_DEFAULT_DEFAULT_VECTOR_MANGLING_ISA_CODE,
+ TARGET_CILKPLUS_MAX_VECTOR_SIZE_FOR_ISA.
+ * doc/tm.texi: Regenerate.
+ * config/i386/i386.c (ix86_cilkplus_default_vector_mangling_isa_code):
+ New.
+ (ix86_cilkplus_max_vector_size_for_isa): New.
+ (TARGET_CILKPLUS_DEFAULT_DEFAULT_VECTOR_MANGLING_ISA_CODE): Define.
+ (TARGET_CILKPLUS_MAX_VECTOR_SIZE_FOR_ISA): Define.
+
2013-09-19 Jakub Jelinek <jakub@redhat.com>
PR tree-optimization/58472
@@ -2573,6 +2573,7 @@ omp-low.o : omp-low.c $(CONFIG_H) $(SYSTEM_H) coretypes.h $(TM_H) $(TREE_H) \
$(RTL_H) $(GIMPLE_H) $(TREE_INLINE_H) langhooks.h $(DIAGNOSTIC_CORE_H) \
$(TREE_SSA_H) $(FLAGS_H) $(EXPR_H) $(DIAGNOSTIC_CORE_H) \
$(TREE_PASS_H) $(GGC_H) $(EXCEPT_H) $(SPLAY_TREE_H) $(OPTABS_H) \
+ $(PRETTY_PRINT_H) \
$(CFGLOOP_H) tree-iterator.h $(TARGET_H) gt-omp-low.h
tree-browser.o : tree-browser.c tree-browser.def $(CONFIG_H) $(SYSTEM_H) \
coretypes.h $(HASH_TABLE_H) $(TREE_H) $(TREE_PRETTY_PRINT_H)
@@ -248,6 +248,68 @@ struct GTY(()) cgraph_clone_info
bitmap combined_args_to_skip;
};
+enum linear_stride_type {
+ LINEAR_STRIDE_NO,
+ LINEAR_STRIDE_YES_CONSTANT,
+ LINEAR_STRIDE_YES_VARIABLE
+};
+
+/* Function arguments in the original function of a SIMD clone.
+ Supplementary data for `struct simd_clone'. */
+
+struct GTY(()) simd_clone_arg {
+ /* A SIMD clone's argument can be either linear (constant or
+ variable), uniform, or vector. If the argument is neither linear
+ or uniform, the default is vector. */
+
+ /* If the linear stride is a constant, `linear_stride' is
+ LINEAR_STRIDE_YES_CONSTANT, and `linear_stride_num' holds
+ the numeric stride.
+
+ If the linear stride is variable, `linear_stride' is
+ LINEAR_STRIDE_YES_VARIABLE, and `linear_stride_num' contains
+ the function argument containing the stride (as an index into the
+ function arguments starting at 0).
+
+ Otherwise, `linear_stride' is LINEAR_STRIDE_NO and
+ `linear_stride_num' is unused. */
+ enum linear_stride_type linear_stride;
+ unsigned HOST_WIDE_INT linear_stride_num;
+
+ /* Variable alignment if available, otherwise 0. */
+ unsigned int alignment;
+
+ /* True if variable is uniform. */
+ unsigned int uniform : 1;
+};
+
+/* Specific data for a SIMD function clone. */
+
+struct GTY(()) simd_clone {
+ /* Number of words in the SIMD lane associated with this clone. */
+ unsigned int simdlen;
+
+ /* Number of annotated function arguments in `args'. This is
+ usually the number of named arguments in FNDECL. */
+ unsigned int nargs;
+
+ /* Max hardware vector size in bits. */
+ unsigned int hw_vector_size;
+
+ /* Used to determine ISA in mangling. */
+ unsigned char isa;
+
+ /* True if this is the masked, in-branch version of the clone,
+ otherwise false. */
+ unsigned int inbranch : 1;
+
+ /* True if this is a Cilk Plus variant. */
+ unsigned int cilk_elemental : 1;
+
+ /* Annotated function arguments for the original function. */
+ struct simd_clone_arg GTY((length ("%h.nargs"))) args[1];
+};
+
/* The cgraph data structure.
Each function decl has assigned cgraph_node listing callees and callers. */
@@ -282,6 +344,10 @@ struct GTY(()) cgraph_node {
/* Declaration node used to be clone of. */
tree former_clone_of;
+ /* If this is a SIMD clone, this points to the SIMD specific
+ information for it. */
+ struct simd_clone *simdclone;
+
/* Interprocedural passes scheduled to have their transform functions
applied next time we execute local pass on them. We maintain it
per-function in order to allow IPA passes to introduce new functions. */
@@ -323,6 +389,8 @@ struct GTY(()) cgraph_node {
/* ?? We should be able to remove this. We have enough bits in
cgraph to calculate it. */
unsigned tm_clone : 1;
+ /* True if this function has SIMD clones. */
+ unsigned has_simd_clones : 1;
/* True if this decl is a dispatcher for function versions. */
unsigned dispatcher_function : 1;
};
@@ -42806,6 +42806,43 @@ ix86_memmodel_check (unsigned HOST_WIDE_INT val)
return val;
}
+/* Return the default vector mangling ISA code when none is specified
+ in a `processor' clause. */
+
+static char
+ix86_cilkplus_default_vector_mangling_isa_code (struct cgraph_node *clone
+ ATTRIBUTE_UNUSED)
+{
+ return 'x';
+}
+
+/* Return the maximum hardware vector size (in bits) for a given ISA.
+ ISA is an ISA character as specified in Intel's Vector ABI (section
+ on mangling). */
+
+static unsigned int
+ix86_cilkplus_max_vector_size_for_isa (char isa)
+{
+ /* ?? Intel currently has no ISA encoding character for AVX-512. */
+ switch (isa)
+ {
+ case 'x':
+ /* xmm (SSE2). */
+ return 128;
+ case 'y':
+ /* ymm1 (AVX1). */
+ case 'Y':
+ /* ymm2 (AVX2). */
+ return 256;
+ case 'z':
+ /* zmm (MIC). */
+ return 512;
+ default:
+ gcc_unreachable ();
+ return 0;
+ }
+}
+
/* Initialize the GCC target structure. */
#undef TARGET_RETURN_IN_MEMORY
#define TARGET_RETURN_IN_MEMORY ix86_return_in_memory
@@ -43178,6 +43215,14 @@ ix86_memmodel_check (unsigned HOST_WIDE_INT val)
#undef TARGET_SPILL_CLASS
#define TARGET_SPILL_CLASS ix86_spill_class
+#undef TARGET_CILKPLUS_DEFAULT_VECTOR_MANGLING_ISA_CODE
+#define TARGET_CILKPLUS_DEFAULT_VECTOR_MANGLING_ISA_CODE \
+ ix86_cilkplus_default_vector_mangling_isa_code
+
+#undef TARGET_CILKPLUS_MAX_VECTOR_SIZE_FOR_ISA
+#define TARGET_CILKPLUS_MAX_VECTOR_SIZE_FOR_ISA \
+ ix86_cilkplus_max_vector_size_for_isa
+
struct gcc_target targetm = TARGET_INITIALIZER;
#include "gt-i386.h"
@@ -5787,6 +5787,26 @@ The default is @code{NULL_TREE} which means to not vectorize gather
loads.
@end deftypefn
+@deftypefn {Target Hook} char TARGET_CILKPLUS_DEFAULT_VECTOR_MANGLING_ISA_CODE (struct cgraph_node *@var{})
+This hook should return the default vector mangling ISA code when none
+is specified in a Cilk Plus @code{processor} clause. This is as specified
+in the Intel Vector ABI document.
+
+This hook, as well as @code{max_vector_size_for_isa} below must be set
+to support the Cilk Plus @code{processor} clause.
+
+The only argument is a @var{cgraph_node} containing the clone.
+@end deftypefn
+
+@deftypefn {Target Hook} {unsigned int} TARGET_CILKPLUS_MAX_VECTOR_SIZE_FOR_ISA (char)
+This hook returns the maximum hardware vector size in bits for a given
+@var{ISA} character. The @var{ISA} character is as described in Intel's
+Vector ABI (see section on mangling).
+
+This hook must be defined in order to support the Cilk Plus @code{processor}
+clause.
+@end deftypefn
+
@node Anchored Addresses
@section Anchored Addresses
@cindex anchored addresses
@@ -4414,6 +4414,10 @@ address; but often a machine-dependent strategy can generate better code.
@hook TARGET_VECTORIZE_BUILTIN_GATHER
+@hook TARGET_CILKPLUS_DEFAULT_VECTOR_MANGLING_ISA_CODE
+
+@hook TARGET_CILKPLUS_MAX_VECTOR_SIZE_FOR_ISA
+
@node Anchored Addresses
@section Anchored Addresses
@cindex anchored addresses
@@ -276,4 +276,11 @@ ggc_alloc_cleared_gimple_statement_d_stat (size_t s MEM_STAT_DECL)
ggc_internal_cleared_alloc_stat (s PASS_MEM_STAT);
}
+static inline struct simd_clone *
+ggc_alloc_cleared_simd_clone_stat (size_t s MEM_STAT_DECL)
+{
+ return (struct simd_clone *)
+ ggc_internal_cleared_alloc_stat (s PASS_MEM_STAT);
+}
+
#endif
@@ -446,6 +446,13 @@ determine_versionability (struct cgraph_node *node)
reason = "not a tree_versionable_function";
else if (cgraph_function_body_availability (node) <= AVAIL_OVERWRITABLE)
reason = "insufficient body availability";
+ else if (node->has_simd_clones)
+ {
+ /* Ideally we should clone the SIMD clones themselves and create
+ vector copies of them, so IPA-cp and SIMD clones can happily
+ coexist, but that may not be worth the effort. */
+ reason = "function has SIMD clones";
+ }
if (reason && dump_file && !node->symbol.alias && !node->thunk.thunk_p)
fprintf (dump_file, "Function %s/%i is not versionable, reason: %s.\n",
@@ -43,6 +43,7 @@ along with GCC; see the file COPYING3. If not see
#include "optabs.h"
#include "cfgloop.h"
#include "target.h"
+#include "pretty-print.h"
/* Lowering of OpenMP parallel and workshare constructs proceeds in two
@@ -10287,5 +10288,449 @@ make_pass_diagnose_omp_blocks (gcc::context *ctxt)
{
return new pass_diagnose_omp_blocks (ctxt);
}
+
+/* SIMD clone supporting code. */
+
+/* A map for function arguments. This will map a zero-based integer
+ to the corresponding index into DECL_ARGUMENTS. */
+class argno_map
+{
+ vec<tree> tree_args;
+ public:
+ /* Default constructor declared but not implemented by design. The
+ only valid constructor is TREE version below. */
+ argno_map ();
+ argno_map (tree fndecl);
+
+ ~argno_map () { tree_args.release (); }
+ tree to_tree (int n);
+};
+
+/* FNDECL is the function containing the arguments. */
+
+argno_map::argno_map (tree fndecl)
+{
+ tree_args.create (5);
+ for (tree t = DECL_ARGUMENTS (fndecl); t; t = DECL_CHAIN (t))
+ tree_args.safe_push (t);
+}
+
+/* Return the DECL corresponding to the zero-based integer index into
+ the function arguments. */
+
+tree
+argno_map::to_tree (int n)
+{
+ return tree_args[n];
+}
+
+/* Allocate a fresh `simd_clone' and return it. NARGS is the number
+ of arguments to reserve space for. */
+
+static struct simd_clone *
+simd_clone_struct_alloc (int nargs)
+{
+ struct simd_clone *clone_info;
+ int len = sizeof (struct simd_clone)
+ + nargs * sizeof (struct simd_clone_arg);
+ clone_info = ggc_alloc_cleared_simd_clone_stat (len PASS_MEM_STAT);
+ return clone_info;
+}
+
+/* Make a copy of the `struct simd_clone' in FROM to TO. */
+
+static inline void
+simd_clone_struct_copy (struct simd_clone *to, struct simd_clone *from)
+{
+ memcpy (to, from, sizeof (struct simd_clone)
+ + from->nargs * sizeof (struct simd_clone_arg));
+}
+
+/* Given a simd clone in NEW_NODE, extract the simd specific
+ information from the OMP clauses passed in CLAUSES, and set the
+ relevant bits in the cgraph node. *INBRANCH_SPECIFIED is set to
+ TRUE if the `inbranch' or `notinbranch' clause specified, otherwise
+ set to FALSE. */
+
+static void
+simd_clone_clauses_extract (struct cgraph_node *new_node, tree clauses,
+ bool *inbranch_specified)
+{
+ tree t;
+ int n = 0;
+ *inbranch_specified = false;
+ for (t = DECL_ARGUMENTS (new_node->symbol.decl); t; t = DECL_CHAIN (t))
+ ++n;
+
+ /* To distinguish from an OpenMP simd clone, Cilk Plus functions to
+ be cloned have a distinctive artificial label in addition to "omp
+ declare simd". */
+ bool cilk_clone = flag_enable_cilkplus
+ && lookup_attribute ("cilk plus elemental",
+ DECL_ATTRIBUTES (new_node->symbol.decl));
+ if (cilk_clone)
+ remove_attribute ("cilk plus elemental",
+ DECL_ATTRIBUTES (new_node->symbol.decl));
+
+ struct simd_clone *clone_info = simd_clone_struct_alloc (n);
+ clone_info->nargs = n;
+ clone_info->cilk_elemental = cilk_clone;
+ gcc_assert (!new_node->simdclone);
+ new_node->simdclone = clone_info;
+
+ if (!clauses || TREE_CODE (clauses) != OMP_CLAUSE)
+ return;
+
+ for (t = clauses; t; t = OMP_CLAUSE_CHAIN (t))
+ {
+ switch (OMP_CLAUSE_CODE (t))
+ {
+ case OMP_CLAUSE_INBRANCH:
+ clone_info->inbranch = 1;
+ *inbranch_specified = true;
+ break;
+ case OMP_CLAUSE_NOTINBRANCH:
+ clone_info->inbranch = 0;
+ *inbranch_specified = true;
+ break;
+ case OMP_CLAUSE_SIMDLEN:
+ clone_info->simdlen
+ = TREE_INT_CST_LOW (OMP_CLAUSE_SIMDLEN_EXPR (t));
+ break;
+ case OMP_CLAUSE_LINEAR:
+ {
+ tree decl = OMP_CLAUSE_DECL (t);
+ tree step = OMP_CLAUSE_LINEAR_STEP (t);
+ int argno = TREE_INT_CST_LOW (decl);
+ if (OMP_CLAUSE_LINEAR_VARIABLE_STRIDE (t))
+ {
+ clone_info->args[argno].linear_stride
+ = LINEAR_STRIDE_YES_VARIABLE;
+ clone_info->args[argno].linear_stride_num
+ = TREE_INT_CST_LOW (step);
+ gcc_assert (!TREE_INT_CST_HIGH (step));
+ }
+ else
+ {
+ if (TREE_INT_CST_HIGH (step))
+ {
+ /* It looks like this can't really happen, since the
+ front-ends generally issue:
+
+ warning: integer constant is too large for its type.
+
+ But let's assume somehow we got past all that. */
+ warning_at (DECL_SOURCE_LOCATION (decl), 0,
+ "ignoring large linear step");
+ }
+ else
+ {
+ clone_info->args[argno].linear_stride
+ = LINEAR_STRIDE_YES_CONSTANT;
+ clone_info->args[argno].linear_stride_num
+ = TREE_INT_CST_LOW (step);
+ }
+ }
+ break;
+ }
+ case OMP_CLAUSE_UNIFORM:
+ {
+ tree decl = OMP_CLAUSE_DECL (t);
+ int argno = tree_low_cst (decl, 1);
+ clone_info->args[argno].uniform = 1;
+ break;
+ }
+ case OMP_CLAUSE_ALIGNED:
+ {
+ tree decl = OMP_CLAUSE_DECL (t);
+ int argno = tree_low_cst (decl, 1);
+ clone_info->args[argno].alignment
+ = TREE_INT_CST_LOW (OMP_CLAUSE_ALIGNED_ALIGNMENT (t));
+ break;
+ }
+ default:
+ break;
+ }
+ }
+}
+
+/* Helper function for mangling vectors. Given a vector size in bits,
+ return the corresponding mangling character. */
+
+static char
+vecsize_mangle (unsigned int vecsize)
+{
+ switch (vecsize)
+ {
+ /* The Intel Vector ABI does not provide a mangling character
+ for a 64-bit ISA, but this feels like it's keeping with the
+ design. */
+ case 64: return 'w';
+
+ case 128: return 'x';
+ case 256: return 'y';
+ case 512: return 'z';
+ default:
+ /* FIXME: We must come up with a default mangling bit. */
+ return 'x';
+ }
+}
+
+/* Given a SIMD clone in NEW_NODE, calculate the characteristic data
+ type and return the coresponding type. The characteristic data
+ type is computed as described in the Intel Vector ABI. */
+
+static tree
+simd_clone_compute_base_data_type (struct cgraph_node *new_node)
+{
+ tree type = integer_type_node;
+ tree fndecl = new_node->symbol.decl;
+
+ /* a) For non-void function, the characteristic data type is the
+ return type. */
+ if (TREE_CODE (TREE_TYPE (TREE_TYPE (fndecl))) != VOID_TYPE)
+ type = TREE_TYPE (TREE_TYPE (fndecl));
+
+ /* b) If the function has any non-uniform, non-linear parameters,
+ then the characteristic data type is the type of the first
+ such parameter. */
+ else
+ {
+ argno_map map (fndecl);
+ for (unsigned int i = 0; i < new_node->simdclone->nargs; ++i)
+ {
+ struct simd_clone_arg arg = new_node->simdclone->args[i];
+ if (!arg.uniform && arg.linear_stride == LINEAR_STRIDE_NO)
+ {
+ type = TREE_TYPE (map.to_tree (i));
+ break;
+ }
+ }
+ }
+
+ /* c) If the characteristic data type determined by a) or b) above
+ is struct, union, or class type which is pass-by-value (except
+ for the type that maps to the built-in complex data type), the
+ characteristic data type is int. */
+ if (RECORD_OR_UNION_TYPE_P (type)
+ && !aggregate_value_p (type, NULL)
+ && TREE_CODE (type) != COMPLEX_TYPE)
+ return integer_type_node;
+
+ /* d) If none of the above three classes is applicable, the
+ characteristic data type is int. */
+
+ return type;
+
+ /* e) For Intel Xeon Phi native and offload compilation, if the
+ resulting characteristic data type is 8-bit or 16-bit integer
+ data type, the characteristic data type is int. */
+ /* Well, we don't handle Xeon Phi yet. */
+}
+
+/* Given a SIMD clone in NEW_NODE, compute the default ISA, simdlen,
+ and hardware vector size and store them in NEW_NODE->simdclone. */
+
+static void
+simd_clone_compute_isa_and_simdlen (struct cgraph_node *new_node)
+{
+ char isa = new_node->simdclone->isa;
+ /* Vector size for this clone. */
+ unsigned int vecsize = 0;
+ /* Base vector type, based on function arguments. */
+ tree base_type = simd_clone_compute_base_data_type (new_node);
+ unsigned int base_type_size = GET_MODE_BITSIZE (TYPE_MODE (base_type));
+
+ /* Calculate everything for Cilk Plus clones with appropriate target
+ support. This is as specified in the Intel Vector ABI.
+
+ Note: Any target which supports the Cilk Plus processor clause
+ must also provide appropriate target hooks for calculating
+ default ISA/processor (default_vector_mangling_isa_code), and for
+ calculating hardware vector size based on ISA/processor
+ (max_vector_size_for_isa). */
+ if (new_node->simdclone->cilk_elemental
+ && targetm.cilkplus.default_vector_mangling_isa_code)
+ {
+ if (!isa)
+ isa = targetm.cilkplus.default_vector_mangling_isa_code (new_node);
+ vecsize = targetm.cilkplus.max_vector_size_for_isa (isa);
+ if (!new_node->simdclone->simdlen)
+ new_node->simdclone->simdlen = vecsize / base_type_size;
+ }
+ /* Calculate everything else generically. */
+ else
+ {
+ vecsize = GET_MODE_BITSIZE (targetm.vectorize.preferred_simd_mode
+ (TYPE_MODE (base_type)));
+ isa = vecsize_mangle (vecsize);
+ if (!new_node->simdclone->simdlen)
+ new_node->simdclone->simdlen = vecsize / base_type_size;
+ }
+ new_node->simdclone->isa = isa;
+ new_node->simdclone->hw_vector_size = vecsize;
+}
+
+static void
+simd_clone_mangle (struct cgraph_node *old_node, struct cgraph_node *new_node)
+{
+ char isa = new_node->simdclone->isa;
+ char mask = new_node->simdclone->inbranch ? 'M' : 'N';
+ unsigned int simdlen = new_node->simdclone->simdlen;
+ unsigned int n;
+ pretty_printer vars_pp;
+
+ gcc_assert (isa && simdlen);
+
+ for (n = 0; n < new_node->simdclone->nargs; ++n)
+ {
+ struct simd_clone_arg arg = new_node->simdclone->args[n];
+
+ if (arg.uniform)
+ pp_character (&vars_pp, 'u');
+ else if (arg.linear_stride == LINEAR_STRIDE_YES_CONSTANT)
+ {
+ gcc_assert (arg.linear_stride_num != 0);
+ pp_character (&vars_pp, 'l');
+ if (arg.linear_stride_num > 1)
+ pp_unsigned_wide_integer (&vars_pp,
+ arg.linear_stride_num);
+ }
+ else if (arg.linear_stride == LINEAR_STRIDE_YES_VARIABLE)
+ {
+ pp_character (&vars_pp, 's');
+ pp_unsigned_wide_integer (&vars_pp, arg.linear_stride_num);
+ }
+ else
+ pp_character (&vars_pp, 'v');
+ if (arg.alignment)
+ {
+ pp_character (&vars_pp, 'a');
+ pp_decimal_int (&vars_pp, arg.alignment);
+ }
+ }
+
+ pretty_printer pp;
+ pp_printf (&pp, "_ZGV%c%c%d%s_%s", isa, mask, simdlen,
+ pp_formatted_text (&vars_pp),
+ IDENTIFIER_POINTER
+ (DECL_ASSEMBLER_NAME (old_node->symbol.decl)));
+ const char *str = pp_formatted_text (&pp);
+ change_decl_assembler_name (new_node->symbol.decl,
+ get_identifier (str));
+}
+
+/* Create a simd clone of OLD_NODE and return it. */
+
+static struct cgraph_node *
+simd_clone_create (struct cgraph_node *old_node)
+{
+ struct cgraph_node *new_node;
+ new_node = cgraph_function_versioning (old_node, vNULL, NULL, NULL, false,
+ NULL, NULL, "simdclone");
+
+ /* Keep cgraph friends from removing the clone. */
+ new_node->symbol.externally_visible
+ = old_node->symbol.externally_visible;
+ TREE_PUBLIC (new_node->symbol.decl) = TREE_PUBLIC (old_node->symbol.decl);
+ old_node->has_simd_clones = true;
+
+ DECL_ATTRIBUTES (new_node->symbol.decl)
+ = remove_attribute ("omp declare simd",
+ DECL_ATTRIBUTES (new_node->symbol.decl));
+
+ return new_node;
+}
+
+/* If the function in NODE is tagged as an elemental SIMD function,
+ create the appropriate SIMD clones. */
+
+static void
+expand_simd_clones (struct cgraph_node *node)
+{
+ if (cgraph_function_body_availability (node) < AVAIL_OVERWRITABLE)
+ return;
+
+ tree attr = lookup_attribute ("omp declare simd",
+ DECL_ATTRIBUTES (node->symbol.decl));
+ if (!attr)
+ return;
+ do
+ {
+ struct cgraph_node *new_node = simd_clone_create (node);
+
+ bool inbranch_clause;
+ simd_clone_clauses_extract (new_node, TREE_VALUE (attr),
+ &inbranch_clause);
+ simd_clone_compute_isa_and_simdlen (new_node);
+ simd_clone_mangle (node, new_node);
+
+ // FIXME: Adjust clone parameters to their appropriate vector types.
+
+ /* If no inbranch clause was specified, we need both variants.
+ We have already created the not-in-branch version above, by
+ virtue of .inbranch being clear. Create the masked in-branch
+ version. */
+ if (!inbranch_clause)
+ {
+ struct cgraph_node *n = simd_clone_create (node);
+ struct simd_clone *clone
+ = simd_clone_struct_alloc (new_node->simdclone->nargs);
+ simd_clone_struct_copy (clone, new_node->simdclone);
+ clone->inbranch = 1;
+ n->simdclone = clone;
+ simd_clone_mangle (node, n);
+ }
+ }
+ while ((attr = lookup_attribute ("omp declare simd", TREE_CHAIN (attr))));
+}
+
+/* Entry point for IPA simd clone creation pass. */
+
+static unsigned int
+ipa_omp_simd_clone (void)
+{
+ struct cgraph_node *node;
+ FOR_EACH_DEFINED_FUNCTION (node)
+ expand_simd_clones (node);
+ return 0;
+}
+
+namespace {
+
+const pass_data pass_data_omp_simd_clone =
+{
+ SIMPLE_IPA_PASS, /* type */
+ "simdclone", /* name */
+ OPTGROUP_NONE, /* optinfo_flags */
+ true, /* has_gate */
+ true, /* has_execute */
+ TV_NONE, /* tv_id */
+ ( PROP_ssa | PROP_cfg ), /* properties_required */
+ 0, /* properties_provided */
+ 0, /* properties_destroyed */
+ 0, /* todo_flags_start */
+ 0, /* todo_flags_finish */
+};
+
+class pass_omp_simd_clone : public simple_ipa_opt_pass
+{
+public:
+ pass_omp_simd_clone(gcc::context *ctxt)
+ : simple_ipa_opt_pass(pass_data_omp_simd_clone, ctxt)
+ {}
+
+ /* opt_pass methods: */
+ bool gate () { return flag_openmp || flag_enable_cilkplus; }
+ unsigned int execute () { return ipa_omp_simd_clone (); }
+};
+
+} // anon namespace
+
+simple_ipa_opt_pass *
+make_pass_omp_simd_clone (gcc::context *ctxt)
+{
+ return new pass_omp_simd_clone (ctxt);
+}
#include "gt-omp-low.h"
@@ -97,6 +97,7 @@ along with GCC; see the file COPYING3. If not see
NEXT_PASS (pass_feedback_split_functions);
POP_INSERT_PASSES ()
NEXT_PASS (pass_ipa_increase_alignment);
+ NEXT_PASS (pass_omp_simd_clone);
NEXT_PASS (pass_ipa_tm);
NEXT_PASS (pass_ipa_lower_emutls);
TERMINATE_PASS_LIST ()
@@ -1508,6 +1508,35 @@ hook_int_uint_mode_1)
HOOK_VECTOR_END (sched)
+/* Functions relating to Cilk Plus. */
+#undef HOOK_PREFIX
+#define HOOK_PREFIX "TARGET_CILKPLUS_"
+HOOK_VECTOR (TARGET_CILKPLUS, cilkplus)
+
+DEFHOOK
+(default_vector_mangling_isa_code,
+"This hook should return the default vector mangling ISA code when none\n\
+is specified in a Cilk Plus @code{processor} clause. This is as specified\n\
+in the Intel Vector ABI document.\n\
+\n\
+This hook, as well as @code{max_vector_size_for_isa} below must be set\n\
+to support the Cilk Plus @code{processor} clause.\n\
+\n\
+The only argument is a @var{cgraph_node} containing the clone.",
+char, (struct cgraph_node *), NULL)
+
+DEFHOOK
+(max_vector_size_for_isa,
+"This hook returns the maximum hardware vector size in bits for a given\n\
+@var{ISA} character. The @var{ISA} character is as described in Intel's\n\
+Vector ABI (see section on mangling).\n\
+\n\
+This hook must be defined in order to support the Cilk Plus @code{processor}\n\
+clause.",
+unsigned int, (char), NULL)
+
+HOOK_VECTOR_END (cilkplus)
+
/* Functions relating to vectorization. */
#undef HOOK_PREFIX
#define HOOK_PREFIX "TARGET_VECTORIZE_"
new file mode 100644
@@ -0,0 +1,33 @@
+/* { dg-do compile } */
+/* { dg-options "-fopenmp -fdump-tree-optimized -O3" } */
+
+/* Test that functions that have SIMD clone counterparts are not
+ cloned by IPA-cp. For example, special_add() below has SIMD clones
+ created for it. However, if IPA-cp later decides to clone a
+ specialization of special_add(x, 666) when analyzing fillit(), we
+ will forever keep the vectorizer from using the SIMD versions of
+ special_add in a loop.
+
+ If IPA-CP gets taught how to adjust the SIMD clones as well, this
+ test could be removed. */
+
+#pragma omp declare simd simdlen(4)
+static int __attribute__ ((noinline))
+special_add (int x, int y)
+{
+ if (y == 666)
+ return x + y + 123;
+ else
+ return x + y;
+}
+
+void fillit(int *tot)
+{
+ int i;
+
+ for (i=0; i < 10000; ++i)
+ tot[i] = special_add (i, 666);
+}
+
+/* { dg-final { scan-tree-dump-not "special_add.constprop" "optimized" } } */
+/* { dg-final { cleanup-tree-dump "optimized" } } */
new file mode 100644
@@ -0,0 +1,21 @@
+/* { dg-do compile { target i?86-*-* x86_64-*-* } } */
+/* { dg-options "-fopenmp -fdump-tree-optimized -O -msse2" } */
+
+#pragma omp declare simd inbranch uniform(c) linear(b:66) // addit.simdclone.2
+#pragma omp declare simd notinbranch aligned(c:32) // addit.simdclone.1
+int addit(int a, int b, int c)
+{
+ return a + b;
+}
+
+#pragma omp declare simd uniform(a) aligned(a:32) linear(k:1) notinbranch
+float setArray(float *a, float x, int k)
+{
+ a[k] = a[k] + x;
+ return a[k];
+}
+
+/* { dg-final { scan-tree-dump "clone.0 \\(_ZGVxN4ua32vl_setArray" "optimized" } } */
+/* { dg-final { scan-tree-dump "clone.1 \\(_ZGVxN4vvva32_addit" "optimized" } } */
+/* { dg-final { scan-tree-dump "clone.2 \\(_ZGVxM4vl66u_addit" "optimized" } } */
+/* { dg-final { cleanup-tree-dump "optimized" } } */
new file mode 100644
@@ -0,0 +1,15 @@
+/* { dg-do compile { target i?86-*-* x86_64-*-* } } */
+/* { dg-options "-fopenmp -fdump-tree-optimized -O -msse2" } */
+
+/* Test that if there is no *inbranch clauses, that both the masked and
+ the unmasked version are created. */
+
+#pragma omp declare simd
+int addit(int a, int b, int c)
+{
+ return a + b;
+}
+
+/* { dg-final { scan-tree-dump "clone.* \\(_ZGVxN4vvv_addit" "optimized" } } */
+/* { dg-final { scan-tree-dump "clone.* \\(_ZGVxM4vvv_addit" "optimized" } } */
+/* { dg-final { cleanup-tree-dump "optimized" } } */
@@ -885,6 +885,9 @@ struct GTY(()) tree_base {
CALL_ALLOCA_FOR_VAR_P in
CALL_EXPR
+ OMP_CLAUSE_LINEAR_VARIABLE_STRIDE in
+ OMP_CLAUSE_LINEAR
+
side_effects_flag:
TREE_SIDE_EFFECTS in
@@ -474,6 +474,7 @@ extern ipa_opt_pass_d *make_pass_ipa_pure_const (gcc::context *ctxt);
extern simple_ipa_opt_pass *make_pass_ipa_pta (gcc::context *ctxt);
extern ipa_opt_pass_d *make_pass_ipa_lto_finish_out (gcc::context *ctxt);
extern simple_ipa_opt_pass *make_pass_ipa_tm (gcc::context *ctxt);
+extern simple_ipa_opt_pass *make_pass_omp_simd_clone (gcc::context *ctxt);
extern ipa_opt_pass_d *make_pass_ipa_profile (gcc::context *ctxt);
extern ipa_opt_pass_d *make_pass_ipa_cdtor_merge (gcc::context *ctxt);
@@ -1318,6 +1318,10 @@ extern void protected_set_expr_location (tree, location_t);
#define OMP_CLAUSE_LINEAR_NO_COPYOUT(NODE) \
TREE_PRIVATE (OMP_CLAUSE_SUBCODE_CHECK (NODE, OMP_CLAUSE_LINEAR))
+/* True if a LINEAR clause has a stride that is variable. */
+#define OMP_CLAUSE_LINEAR_VARIABLE_STRIDE(NODE) \
+ TREE_PROTECTED (OMP_CLAUSE_SUBCODE_CHECK (NODE, OMP_CLAUSE_LINEAR))
+
#define OMP_CLAUSE_LINEAR_STEP(NODE) \
OMP_CLAUSE_OPERAND (OMP_CLAUSE_SUBCODE_CHECK (NODE, OMP_CLAUSE_LINEAR), 1)