===================================================================
@@ -9913,6 +9913,14 @@ changed via the optimize attribute or pragma, see
@code{TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE}
@end deftypefn
+@deftypefn {Target Hook} bool TARGET_OPTION_FUNCTION_VERSIONS (tree @var{decl1}, tree @var{decl2})
+This target hook returns @code{true} if @var{DECL1} and @var{DECL2} are
+versions of the same function. @var{DECL1} and @var{DECL2} are function
+versions if and only if they have the same function signature and
+different target specific attributes, that is, they are compiled for
+different target machines.
+@end deftypefn
+
@deftypefn {Target Hook} bool TARGET_CAN_INLINE_P (tree @var{caller}, tree @var{callee})
This target hook returns @code{false} if the @var{caller} function
cannot inline @var{callee}, based on target specific information. By
@@ -10930,6 +10938,29 @@ The result is another tree containing a simplified
call's result. If @var{ignore} is true the value will be ignored.
@end deftypefn
+@deftypefn {Target Hook} int TARGET_COMPARE_VERSION_PRIORITY (tree @var{decl1}, tree @var{decl2})
+This hook is used to compare the target attributes in two functions to
+determine which function's features get higher priority. This is used
+during function multi-versioning to figure out the order in which two
+versions must be dispatched. A function version with a higher priority
+is checked for dispatching earlier. @var{decl1} and @var{decl2} are
+ the two function decls that will be compared.
+@end deftypefn
+
+@deftypefn {Target Hook} tree TARGET_GET_FUNCTION_VERSIONS_DISPATCHER (void *@var{arglist})
+This hook is used to get the dispatcher function for a set of function
+versions. The dispatcher function is called to invoke the right function
+version at run-time. @var{arglist} is the vector of function versions
+that should be considered for dispatch.
+@end deftypefn
+
+@deftypefn {Target Hook} tree TARGET_GENERATE_VERSION_DISPATCHER_BODY (void *@var{arg})
+This hook is used to generate the dispatcher logic to invoke the right
+function version at run-time for a given set of function versions.
+@var{arg} points to the callgraph node of the dispatcher function whose
+body must be generated.
+@end deftypefn
+
@deftypefn {Target Hook} {const char *} TARGET_INVALID_WITHIN_DOLOOP (const_rtx @var{insn})
Take an instruction in @var{insn} and return NULL if it is valid within a
===================================================================
@@ -9782,6 +9782,14 @@ changed via the optimize attribute or pragma, see
@code{TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE}
@end deftypefn
+@hook TARGET_OPTION_FUNCTION_VERSIONS
+This target hook returns @code{true} if @var{DECL1} and @var{DECL2} are
+versions of the same function. @var{DECL1} and @var{DECL2} are function
+versions if and only if they have the same function signature and
+different target specific attributes, that is, they are compiled for
+different target machines.
+@end deftypefn
+
@hook TARGET_CAN_INLINE_P
This target hook returns @code{false} if the @var{caller} function
cannot inline @var{callee}, based on target specific information. By
@@ -10788,6 +10796,29 @@ The result is another tree containing a simplified
call's result. If @var{ignore} is true the value will be ignored.
@end deftypefn
+@hook TARGET_COMPARE_VERSION_PRIORITY
+This hook is used to compare the target attributes in two functions to
+determine which function's features get higher priority. This is used
+during function multi-versioning to figure out the order in which two
+versions must be dispatched. A function version with a higher priority
+is checked for dispatching earlier. @var{decl1} and @var{decl2} are
+ the two function decls that will be compared.
+@end deftypefn
+
+@hook TARGET_GET_FUNCTION_VERSIONS_DISPATCHER
+This hook is used to get the dispatcher function for a set of function
+versions. The dispatcher function is called to invoke the right function
+version at run-time. @var{arglist} is the vector of function versions
+that should be considered for dispatch.
+@end deftypefn
+
+@hook TARGET_GENERATE_VERSION_DISPATCHER_BODY
+This hook is used to generate the dispatcher logic to invoke the right
+function version at run-time for a given set of function versions.
+@var{arg} points to the callgraph node of the dispatcher function whose
+body must be generated.
+@end deftypefn
+
@hook TARGET_INVALID_WITHIN_DOLOOP
Take an instruction in @var{insn} and return NULL if it is valid within a
===================================================================
@@ -633,6 +633,34 @@ cgraph_analyze_function (struct cgraph_node *node)
{
push_cfun (DECL_STRUCT_FUNCTION (decl));
+ /* If this decl is one version of a set of multi-versioned functions,
+ check if its dispatcher body needs to be generated. */
+ if (DECL_FUNCTION_VERSIONED (decl)
+ && get_cgraph_node_version (node) != NULL)
+ {
+ struct cgraph_function_version_info *node_version_info
+ = get_cgraph_node_version (node);
+ if (node_version_info->dispatcher_resolver)
+ {
+ tree dispatcher_decl = node_version_info->dispatcher_resolver;
+ struct cgraph_node *dispatcher_node
+ = cgraph_get_create_node (dispatcher_decl);
+ struct cgraph_function_version_info *dispatcher_version_info
+ = get_cgraph_node_version (dispatcher_node);
+ if (dispatcher_node->local.finalized
+ && dispatcher_version_info != NULL
+ && (dispatcher_version_info->dispatcher_resolver
+ == NULL_TREE))
+ {
+ tree resolver = NULL_TREE;
+ gcc_assert (targetm.generate_version_dispatcher_body);
+ resolver
+ = targetm.generate_version_dispatcher_body (dispatcher_node);
+ gcc_assert (resolver != NULL_TREE);
+ }
+ }
+ }
+
assign_assembler_name_if_neeeded (node->symbol.decl);
/* Make sure to gimplify bodies only once. During analyzing a
===================================================================
@@ -1298,6 +1298,37 @@ DEFHOOK
tree, (tree fndecl, int n_args, tree *argp, bool ignore),
hook_tree_tree_int_treep_bool_null)
+/* Target hook is used to compare the target attributes in two functions to
+ determine which function's features get higher priority. This is used
+ during function multi-versioning to figure out the order in which two
+ versions must be dispatched. A function version with a higher priority
+ is checked for dispatching earlier. DECL1 and DECL2 are
+ the two function decls that will be compared. It returns positive value
+ if DECL1 is higher priority, negative value if DECL2 is higher priority
+ and 0 if they are the same. */
+DEFHOOK
+(compare_version_priority,
+ "",
+ int, (tree decl1, tree decl2), NULL)
+
+/* Target hook is used to generate the dispatcher logic to invoke the right
+ function version at run-time for a given set of function versions.
+ ARG points to the callgraph node of the dispatcher function whose body
+ must be generated. */
+DEFHOOK
+(generate_version_dispatcher_body,
+ "",
+ tree, (void *arg), NULL)
+
+/* Target hook is used to get the dispatcher function for a set of function
+ versions. The dispatcher function is called to invoke the right function
+ version at run-time. ARGLIST is the vector of function versions that
+ should be considered for dispatch. */
+DEFHOOK
+(get_function_versions_dispatcher,
+ "",
+ tree, (void *arglist), NULL)
+
/* Returns a code for a target-specific builtin that implements
reciprocal of the function, or NULL_TREE if not available. */
DEFHOOK
@@ -2725,6 +2756,16 @@ DEFHOOK
void, (void),
hook_void_void)
+/* This function returns true if DECL1 and DECL2 are versions of the same
+ function. DECL1 and DECL2 are function versions if and only if they
+ have the same function signature and different target specific attributes,
+ that is, they are compiled for different target machines. */
+DEFHOOK
+(function_versions,
+ "",
+ bool, (tree decl1, tree decl2),
+ hook_bool_tree_tree_false)
+
/* Function to determine if one function can inline another function. */
#undef HOOK_PREFIX
#define HOOK_PREFIX "TARGET_"
===================================================================
@@ -132,6 +132,74 @@ static GTY(()) struct cgraph_edge *free_edges;
/* Did procss_same_body_aliases run? */
bool same_body_aliases_done;
+/* Map a cgraph_node to cgraph_function_version_info using this htab.
+ The cgraph_function_version_info has a THIS_NODE field that is the
+ corresponding cgraph_node.. */
+htab_t GTY((param_is (struct cgraph_function_version_info *)))
+ cgraph_fnver_htab = NULL;
+
+/* Hash function for cgraph_fnver_htab. */
+static hashval_t
+cgraph_fnver_htab_hash (const void *ptr)
+{
+ int uid = ((const struct cgraph_function_version_info *)ptr)->this_node->uid;
+ return (hashval_t)(uid);
+}
+
+/* eq function for cgraph_fnver_htab. */
+static int
+cgraph_fnver_htab_eq (const void *p1, const void *p2)
+{
+ const struct cgraph_function_version_info *n1
+ = (const struct cgraph_function_version_info *)p1;
+ const struct cgraph_function_version_info *n2
+ = (const struct cgraph_function_version_info *)p2;
+
+ return n1->this_node->uid == n2->this_node->uid;
+}
+
+/* Mark as GC root all allocated nodes. */
+static GTY(()) struct cgraph_function_version_info *
+ version_info_node = NULL;
+
+/* Insert a new cgraph_function_version_info node into cgraph_fnver_htab
+ corresponding to cgraph_node NODE. */
+struct cgraph_function_version_info *
+insert_new_cgraph_node_version (struct cgraph_node *node)
+{
+ void **slot;
+
+ version_info_node = NULL;
+ version_info_node = ggc_alloc_cleared_cgraph_function_version_info ();
+ version_info_node->this_node = node;
+
+ if (cgraph_fnver_htab == NULL)
+ cgraph_fnver_htab = htab_create_ggc (2, cgraph_fnver_htab_hash,
+ cgraph_fnver_htab_eq, NULL);
+
+ slot = htab_find_slot (cgraph_fnver_htab, version_info_node, INSERT);
+ gcc_assert (slot != NULL);
+ *slot = version_info_node;
+ return version_info_node;
+}
+
+/* Get the cgraph_function_version_info node corresponding to node. */
+struct cgraph_function_version_info *
+get_cgraph_node_version (struct cgraph_node *node)
+{
+ struct cgraph_function_version_info *ret;
+ struct cgraph_function_version_info key;
+ key.this_node = node;
+
+ if (cgraph_fnver_htab == NULL)
+ return NULL;
+
+ ret = (struct cgraph_function_version_info *)
+ htab_find (cgraph_fnver_htab, &key);
+
+ return ret;
+}
+
/* Macros to access the next item in the list of free cgraph nodes and
edges. */
#define NEXT_FREE_NODE(NODE) cgraph ((NODE)->symbol.next)
===================================================================
@@ -200,7 +200,38 @@ struct GTY(()) cgraph_clone_info
bitmap combined_args_to_skip;
};
+/* Function Multiversioning info. */
+struct GTY(()) cgraph_function_version_info {
+ /* The cgraph_node for which the function version info is stored. */
+ struct cgraph_node *this_node;
+ /* Chains all the semantically identical function versions. The
+ first function in this chain is the version_info node of the
+ default function. */
+ struct cgraph_function_version_info *prev;
+ /* If this version node corresponds to a dispatcher for function
+ versions, this points to the version info node of the default
+ function, the first node in the chain. */
+ struct cgraph_function_version_info *next;
+ /* If this node corresponds to a function version, this points
+ to the dispatcher function decl, which is the function that must
+ be called to execute the right function version at run-time.
+ If this cgraph node is a dispatcher (if dispatcher_function is
+ true, in the cgraph_node struct) for function versions, this
+ points to resolver function, which holds the function body of the
+ dispatcher. The dispatcher decl is an alias to the resolver
+ function decl. */
+ tree dispatcher_resolver;
+};
+
+/* Defined in cgraph.c */
+/* Get the cgraph_function_version_info node for NODE. */
+struct cgraph_function_version_info *
+ get_cgraph_node_version (struct cgraph_node *node);
+/* Map a new cgraph_function_version_info node for NODE. */
+struct cgraph_function_version_info *
+ insert_new_cgraph_node_version (struct cgraph_node *node);
+
/* The cgraph data structure.
Each function decl has assigned cgraph_node listing callees and callers. */
@@ -279,6 +310,8 @@ struct GTY(()) cgraph_node {
/* ?? We should be able to remove this. We have enough bits in
cgraph to calculate it. */
unsigned tm_clone : 1;
+ /* True if this decl is a dispatcher for function versions. */
+ unsigned dispatcher_function : 1;
};
DEF_VEC_P(symtab_node);
===================================================================
@@ -3476,6 +3476,12 @@ extern VEC(tree, gc) **decl_debug_args_insert (tre
#define DECL_FUNCTION_SPECIFIC_OPTIMIZATION(NODE) \
(FUNCTION_DECL_CHECK (NODE)->function_decl.function_specific_optimization)
+/* In FUNCTION_DECL, this is set if this function has other versions generated
+ using "target" attributes. The default version is the one which does not
+ have any "target" attribute set. */
+#define DECL_FUNCTION_VERSIONED(NODE)\
+ (FUNCTION_DECL_CHECK (NODE)->function_decl.versioned_function)
+
/* FUNCTION_DECL inherits from DECL_NON_COMMON because of the use of the
arguments/result/saved_tree fields by front ends. It was either inherit
FUNCTION_DECL from non_common, or inherit non_common from FUNCTION_DECL,
@@ -3520,8 +3526,8 @@ struct GTY(()) tree_function_decl {
unsigned looping_const_or_pure_flag : 1;
unsigned has_debug_args_flag : 1;
unsigned tm_clone_flag : 1;
-
- /* 1 bit left */
+ unsigned versioned_function : 1;
+ /* No bits left. */
};
/* The source language of the translation-unit. */
===================================================================
@@ -1087,6 +1087,31 @@ add_method (tree type, tree method, tree using_dec
|| same_type_p (TREE_TYPE (fn_type),
TREE_TYPE (method_type))))
{
+ /* For function versions, their parms and types match
+ but they are not duplicates. Record function versions
+ as and when they are found. */
+ if (TREE_CODE (fn) == FUNCTION_DECL
+ && TREE_CODE (method) == FUNCTION_DECL
+ && (DECL_FUNCTION_SPECIFIC_TARGET (fn)
+ || DECL_FUNCTION_SPECIFIC_TARGET (method))
+ && targetm.target_option.function_versions (fn, method))
+ {
+ /* Mark functions as versions if necessary. Modify the mangled
+ decl name if necessary. */
+ if (!DECL_FUNCTION_VERSIONED (fn))
+ {
+ DECL_FUNCTION_VERSIONED (fn) = 1;
+ if (DECL_ASSEMBLER_NAME_SET_P (fn))
+ mangle_decl (fn);
+ }
+ if (!DECL_FUNCTION_VERSIONED (method))
+ {
+ DECL_FUNCTION_VERSIONED (method) = 1;
+ if (DECL_ASSEMBLER_NAME_SET_P (method))
+ mangle_decl (method);
+ }
+ continue;
+ }
if (DECL_INHERITED_CTOR_BASE (method))
{
if (DECL_INHERITED_CTOR_BASE (fn))
@@ -6995,6 +7020,7 @@ resolve_address_of_overloaded_function (tree targe
tree matches = NULL_TREE;
tree fn;
tree target_fn_type;
+ VEC (tree, heap) *fn_ver_vec = NULL;
/* By the time we get here, we should be seeing only real
pointer-to-member types, not the internal POINTER_TYPE to
@@ -7059,9 +7085,19 @@ resolve_address_of_overloaded_function (tree targe
if (DECL_ANTICIPATED (fn))
continue;
- /* See if there's a match. */
+ /* See if there's a match. For functions that are multi-versioned,
+ all the versions match. */
if (same_type_p (target_fn_type, static_fn_type (fn)))
- matches = tree_cons (fn, NULL_TREE, matches);
+ {
+ matches = tree_cons (fn, NULL_TREE, matches);
+ /*If versioned, push all possible versions into a vector. */
+ if (DECL_FUNCTION_VERSIONED (fn))
+ {
+ if (fn_ver_vec == NULL)
+ fn_ver_vec = VEC_alloc (tree, heap, 2);
+ VEC_safe_push (tree, heap, fn_ver_vec, fn);
+ }
+ }
}
}
@@ -7149,13 +7185,26 @@ resolve_address_of_overloaded_function (tree targe
{
/* There were too many matches. First check if they're all
the same function. */
- tree match;
+ tree match = NULL_TREE;
fn = TREE_PURPOSE (matches);
- for (match = TREE_CHAIN (matches); match; match = TREE_CHAIN (match))
- if (!decls_match (fn, TREE_PURPOSE (match)))
- break;
+ /* For multi-versioned functions, more than one match is just fine.
+ Call decls_match to make sure they are different because they are
+ versioned. */
+ if (DECL_FUNCTION_VERSIONED (fn))
+ {
+ for (match = TREE_CHAIN (matches); match; match = TREE_CHAIN (match))
+ if (decls_match (fn, TREE_PURPOSE (match)))
+ break;
+ }
+ else
+ {
+ for (match = TREE_CHAIN (matches); match; match = TREE_CHAIN (match))
+ if (!decls_match (fn, TREE_PURPOSE (match)))
+ break;
+ }
+
if (match)
{
if (flags & tf_error)
@@ -7217,6 +7266,33 @@ resolve_address_of_overloaded_function (tree targe
perform_or_defer_access_check (access_path, fn, fn, flags);
}
+ /* If a pointer to a function that is multi-versioned is requested, the
+ pointer to the dispatcher function is returned instead. This works
+ well because indirectly calling the function will dispatch the right
+ function version at run-time. */
+ if (DECL_FUNCTION_VERSIONED (fn))
+ {
+ struct cgraph_node *node = NULL;
+ tree dispatcher_decl = NULL;
+ gcc_assert (fn_ver_vec != NULL);
+ gcc_assert (targetm.get_function_versions_dispatcher);
+ dispatcher_decl = targetm.get_function_versions_dispatcher (fn_ver_vec);
+ if (!dispatcher_decl)
+ {
+ error_at (input_location, "Pointer to a multiversioned function"
+ " without a default is not allowed");
+ return error_mark_node;
+ }
+ retrofit_lang_decl (dispatcher_decl);
+ mark_used (fn);
+ VEC_free (tree, heap, fn_ver_vec);
+ node = cgraph_get_create_node (dispatcher_decl);
+ gcc_assert (node != NULL);
+ /* Mark this functio to be output. */
+ node->local.finalized = 1;
+ fn = dispatcher_decl;
+ }
+
if (TYPE_PTRFN_P (target_type) || TYPE_PTRMEMFUNC_P (target_type))
return cp_build_addr_expr (fn, flags);
else
===================================================================
@@ -53,6 +53,7 @@ along with GCC; see the file COPYING3. If not see
#include "pointer-set.h"
#include "splay-tree.h"
#include "plugin.h"
+#include "cgraph.h"
/* Possible cases of bad specifiers type used by bad_specifiers. */
enum bad_spec_place {
@@ -981,6 +982,29 @@ decls_match (tree newdecl, tree olddecl)
if (t1 != t2)
return 0;
+ /* The decls dont match if they correspond to two different versions
+ of the same function. */
+ if (compparms (p1, p2)
+ && same_type_p (TREE_TYPE (f1), TREE_TYPE (f2))
+ && targetm.target_option.function_versions (newdecl, olddecl))
+ {
+ /* Mark functions as versions if necessary. Modify the mangled decl
+ name if necessary. */
+ if (!DECL_FUNCTION_VERSIONED (newdecl))
+ {
+ DECL_FUNCTION_VERSIONED (newdecl) = 1;
+ if (DECL_ASSEMBLER_NAME_SET_P (newdecl))
+ mangle_decl (newdecl);
+ }
+ if (!DECL_FUNCTION_VERSIONED (olddecl))
+ {
+ DECL_FUNCTION_VERSIONED (olddecl) = 1;
+ if (DECL_ASSEMBLER_NAME_SET_P (olddecl))
+ mangle_decl (olddecl);
+ }
+ return 0;
+ }
+
if (CP_DECL_CONTEXT (newdecl) != CP_DECL_CONTEXT (olddecl)
&& ! (DECL_EXTERN_C_P (newdecl)
&& DECL_EXTERN_C_P (olddecl)))
@@ -1499,7 +1523,11 @@ duplicate_decls (tree newdecl, tree olddecl, bool
error ("previous declaration %q+#D here", olddecl);
return NULL_TREE;
}
- else if (compparms (TYPE_ARG_TYPES (TREE_TYPE (newdecl)),
+ /* For function versions, params and types match, but they
+ are not ambiguous. */
+ else if ((!DECL_FUNCTION_VERSIONED (newdecl)
+ && !DECL_FUNCTION_VERSIONED (olddecl))
+ && compparms (TYPE_ARG_TYPES (TREE_TYPE (newdecl)),
TYPE_ARG_TYPES (TREE_TYPE (olddecl))))
{
error ("new declaration %q#D", newdecl);
@@ -2272,6 +2300,11 @@ duplicate_decls (tree newdecl, tree olddecl, bool
else if (DECL_PRESERVE_P (newdecl))
DECL_PRESERVE_P (olddecl) = 1;
+ /* If the olddecl is a version, so is the newdecl. */
+ if (TREE_CODE (newdecl) == FUNCTION_DECL
+ && DECL_FUNCTION_VERSIONED (olddecl))
+ DECL_FUNCTION_VERSIONED (newdecl) = 1;
+
if (TREE_CODE (newdecl) == FUNCTION_DECL)
{
int function_size;
@@ -14227,7 +14260,11 @@ cxx_comdat_group (tree decl)
else
break;
}
- name = DECL_ASSEMBLER_NAME (decl);
+ if (TREE_CODE (decl) == FUNCTION_DECL
+ && DECL_FUNCTION_VERSIONED (decl))
+ name = DECL_NAME (decl);
+ else
+ name = DECL_ASSEMBLER_NAME (decl);
}
return name;
===================================================================
@@ -1541,8 +1541,16 @@ dump_exception_spec (tree t, int flags)
static void
dump_function_name (tree t, int flags)
{
- tree name = DECL_NAME (t);
+ tree name;
+ /* For function versions, use the assembler name as the decl name is
+ the same for all versions. */
+ if (TREE_CODE (t) == FUNCTION_DECL
+ && DECL_FUNCTION_VERSIONED (t))
+ name = DECL_ASSEMBLER_NAME (t);
+ else
+ name = DECL_NAME (t);
+
/* We can get here with a decl that was synthesized by language-
independent machinery (e.g. coverage.c) in which case it won't
have a lang_specific structure attached and DECL_CONSTRUCTOR_P
===================================================================
@@ -3813,8 +3813,11 @@ expand_or_defer_fn_1 (tree fn)
/* If the user wants us to keep all inline functions, then mark
this function as needed so that finish_file will make sure to
output it later. Similarly, all dllexport'd functions must
- be emitted; there may be callers in other DLLs. */
- if ((flag_keep_inline_functions
+ be emitted; there may be callers in other DLLs.
+ Also, mark this function as needed if it is marked inline but
+ is a multi-versioned function. */
+ if (((flag_keep_inline_functions
+ || DECL_FUNCTION_VERSIONED (fn))
&& DECL_DECLARED_INLINE_P (fn)
&& !DECL_REALLY_EXTERN (fn))
|| (flag_keep_inline_dllexport
===================================================================
@@ -674,9 +674,13 @@ check_classfn (tree ctype, tree function, tree tem
if (is_template != (TREE_CODE (fndecl) == TEMPLATE_DECL))
continue;
+ /* While finding a match, same types and params are not enough
+ if the function is versioned. Also check version ("target")
+ attributes. */
if (same_type_p (TREE_TYPE (TREE_TYPE (function)),
TREE_TYPE (TREE_TYPE (fndecl)))
&& compparms (p1, p2)
+ && !targetm.target_option.function_versions (function, fndecl)
&& (!is_template
|| comp_template_parms (template_parms,
DECL_TEMPLATE_PARMS (fndecl)))
===================================================================
@@ -40,6 +40,7 @@ along with GCC; see the file COPYING3. If not see
#include "langhooks.h"
#include "c-family/c-objc.h"
#include "timevar.h"
+#include "cgraph.h"
/* The various kinds of conversion. */
@@ -6444,6 +6445,42 @@ magic_varargs_p (tree fn)
return false;
}
+/* Returns the decl of the dispatcher function if FN is a function version. */
+
+static tree
+get_function_version_dispatcher (tree fn)
+{
+ tree dispatcher_decl = NULL;
+ struct cgraph_node *node = NULL;
+ struct cgraph_function_version_info *node_version_info = NULL;
+
+ gcc_assert (TREE_CODE (fn) == FUNCTION_DECL
+ && DECL_FUNCTION_VERSIONED (fn));
+
+ node = cgraph_get_node (fn);
+
+ if (node == NULL)
+ return NULL;
+
+ node_version_info = get_cgraph_node_version (node);
+
+ if (node_version_info != NULL)
+ dispatcher_decl = node_version_info->dispatcher_resolver;
+ else
+ return NULL;
+
+ if (dispatcher_decl == NULL)
+ {
+ error_at (input_location, "Call to multiversioned function"
+ " without a default is not allowed");
+ return NULL;
+ }
+
+ retrofit_lang_decl (dispatcher_decl);
+ gcc_assert (dispatcher_decl != NULL);
+ return dispatcher_decl;
+}
+
/* Subroutine of the various build_*_call functions. Overload resolution
has chosen a winning candidate CAND; build up a CALL_EXPR accordingly.
ARGS is a TREE_LIST of the unconverted arguments to the call. FLAGS is a
@@ -6896,6 +6933,25 @@ build_over_call (struct z_candidate *cand, int fla
if (!already_used)
mark_used (fn);
+ /* For calls to a multi-versioned function, overload resolution
+ returns the function with the highest target priority, that is,
+ the version that will checked for dispatching first. If this
+ version is inlinable, a direct call to this version can be made
+ otherwise the call should go through the dispatcher. */
+
+ if (DECL_FUNCTION_VERSIONED (fn)
+ && !targetm.target_option.can_inline_p (current_function_decl, fn))
+ {
+ struct cgraph_node *dispatcher_node = NULL;
+ fn = get_function_version_dispatcher (fn);
+ if (fn == NULL)
+ return NULL;
+ dispatcher_node = cgraph_get_create_node (fn);
+ gcc_assert (dispatcher_node != NULL);
+ /* Mark this function to be output. */
+ dispatcher_node->local.finalized = 1;
+ }
+
if (DECL_VINDEX (fn) && (flags & LOOKUP_NONVIRTUAL) == 0)
{
tree t;
@@ -8176,6 +8232,38 @@ joust (struct z_candidate *cand1, struct z_candida
&& (IS_TYPE_OR_DECL_P (cand1->fn)))
return 1;
+ /* For candidates of a multi-versioned function, make the version with
+ the highest priority win. This version will be checked for dispatching
+ first. If this version can be inlined into the caller, the front-end
+ will simply make a direct call to this function. */
+
+ if (TREE_CODE (cand1->fn) == FUNCTION_DECL
+ && DECL_FUNCTION_VERSIONED (cand1->fn)
+ && TREE_CODE (cand2->fn) == FUNCTION_DECL
+ && DECL_FUNCTION_VERSIONED (cand2->fn))
+ {
+ tree f1 = TREE_TYPE (cand1->fn);
+ tree f2 = TREE_TYPE (cand2->fn);
+ tree p1 = TYPE_ARG_TYPES (f1);
+ tree p2 = TYPE_ARG_TYPES (f2);
+
+ /* Check if cand1->fn and cand2->fn are versions of the same function. It
+ is possible that cand1->fn and cand2->fn are function versions but of
+ different functions. Check types to see if they are versions of the same
+ function. */
+ if (compparms (p1, p2)
+ && same_type_p (TREE_TYPE (f1), TREE_TYPE (f2)))
+ {
+ /* Always make the version with the higher priority, more
+ specialized, win. */
+ gcc_assert (targetm.compare_version_priority);
+ if (targetm.compare_version_priority (cand1->fn, cand2->fn) >= 0)
+ return 1;
+ else
+ return -1;
+ }
+ }
+
/* a viable function F1
is defined to be a better function than another viable function F2 if
for all arguments i, ICSi(F1) is not a worse conversion sequence than
@@ -8496,6 +8584,37 @@ tweak:
return 0;
}
+/* Function FN is multi-versioned and CANDIDATES contains the list of all
+ overloaded candidates for FN. This function extracts all functions from
+ CANDIDATES that are function versions of FN and generates a dispatcher
+ function for this multi-versioned function group. */
+
+static void
+generate_function_versions_dispatcher (tree fn, struct z_candidate *candidates)
+{
+ tree f1 = TREE_TYPE (fn);
+ tree p1 = TYPE_ARG_TYPES (f1);
+ VEC (tree, heap) *fn_ver_vec = NULL;
+ struct z_candidate *ver = candidates;
+
+ fn_ver_vec = VEC_alloc (tree, heap, 2);
+
+ for (;ver; ver = ver->next)
+ {
+ tree f2 = TREE_TYPE (ver->fn);
+ tree p2 = TYPE_ARG_TYPES (f2);
+ /* If this candidate is a version of FN, types must match. */
+ if (DECL_FUNCTION_VERSIONED (ver->fn)
+ && compparms (p1, p2)
+ && same_type_p (TREE_TYPE (f1), TREE_TYPE (f2)))
+ VEC_safe_push (tree, heap, fn_ver_vec, ver->fn);
+ }
+
+ gcc_assert (targetm.get_function_versions_dispatcher);
+ targetm.get_function_versions_dispatcher (fn_ver_vec);
+ VEC_free (tree, heap, fn_ver_vec);
+}
+
/* Given a list of candidates for overloading, find the best one, if any.
This algorithm has a worst case of O(2n) (winner is last), and a best
case of O(n/2) (totally ambiguous); much better than a sorting
@@ -8548,6 +8667,23 @@ tourney (struct z_candidate *candidates, tsubst_fl
return NULL;
}
+ /* For multiversioned functions, aggregate all the versions here for
+ generating the dispatcher body later if necessary. Check to see if
+ the dispatcher is already generated to avoid doing this more than
+ once. */
+
+ if (TREE_CODE (champ->fn) == FUNCTION_DECL
+ && DECL_FUNCTION_VERSIONED (champ->fn))
+ {
+ struct cgraph_node *champ_node = cgraph_get_node (champ->fn);
+ struct cgraph_function_version_info *champ_version_info = NULL;
+ if (champ_node != NULL)
+ champ_version_info = get_cgraph_node_version (champ_node);
+ if (champ_node == NULL
+ || champ_version_info == NULL
+ || champ_version_info->dispatcher_resolver == NULL)
+ generate_function_versions_dispatcher (champ->fn, candidates);
+ }
return champ;
}
===================================================================
@@ -62,6 +62,8 @@ along with GCC; see the file COPYING3. If not see
#include "opts.h"
#include "diagnostic.h"
#include "dumpfile.h"
+#include "tree-pass.h"
+#include "tree-flow.h"
enum upper_128bits_state
{
@@ -28413,6 +28415,1001 @@ ix86_init_mmx_sse_builtins (void)
}
}
+
+/* This adds a condition to the basic_block NEW_BB in function FUNCTION_DECL
+ to return a pointer to VERSION_DECL if the outcome of the expression
+ formed by PREDICATE_CHAIN is true. This function will be called during
+ version dispatch to decide which function version to execute. It returns
+ the basic block at the end, to which more conditions can be added. */
+
+static basic_block
+add_condition_to_bb (tree function_decl, tree version_decl,
+ tree predicate_chain, basic_block new_bb)
+{
+ gimple return_stmt;
+ tree convert_expr, result_var;
+ gimple convert_stmt;
+ gimple call_cond_stmt;
+ gimple if_else_stmt;
+
+ basic_block bb1, bb2, bb3;
+ edge e12, e23;
+
+ tree cond_var, and_expr_var = NULL_TREE;
+ gimple_seq gseq;
+
+ tree predicate_decl, predicate_arg;
+
+ push_cfun (DECL_STRUCT_FUNCTION (function_decl));
+
+ gcc_assert (new_bb != NULL);
+ gseq = bb_seq (new_bb);
+
+
+ convert_expr = build1 (CONVERT_EXPR, ptr_type_node,
+ build_fold_addr_expr (version_decl));
+ result_var = create_tmp_var (ptr_type_node, NULL);
+ convert_stmt = gimple_build_assign (result_var, convert_expr);
+ return_stmt = gimple_build_return (result_var);
+
+ if (predicate_chain == NULL_TREE)
+ {
+ gimple_seq_add_stmt (&gseq, convert_stmt);
+ gimple_seq_add_stmt (&gseq, return_stmt);
+ set_bb_seq (new_bb, gseq);
+ gimple_set_bb (convert_stmt, new_bb);
+ gimple_set_bb (return_stmt, new_bb);
+ pop_cfun ();
+ return new_bb;
+ }
+
+ while (predicate_chain != NULL)
+ {
+ cond_var = create_tmp_var (integer_type_node, NULL);
+ predicate_decl = TREE_PURPOSE (predicate_chain);
+ predicate_arg = TREE_VALUE (predicate_chain);
+ call_cond_stmt = gimple_build_call (predicate_decl, 1, predicate_arg);
+ gimple_call_set_lhs (call_cond_stmt, cond_var);
+
+ gimple_set_block (call_cond_stmt, DECL_INITIAL (function_decl));
+ gimple_set_bb (call_cond_stmt, new_bb);
+ gimple_seq_add_stmt (&gseq, call_cond_stmt);
+
+ predicate_chain = TREE_CHAIN (predicate_chain);
+
+ if (and_expr_var == NULL)
+ and_expr_var = cond_var;
+ else
+ {
+ gimple assign_stmt;
+ /* Use MIN_EXPR to check if any integer is zero?.
+ and_expr_var = min_expr <cond_var, and_expr_var> */
+ assign_stmt = gimple_build_assign (and_expr_var,
+ build2 (MIN_EXPR, integer_type_node,
+ cond_var, and_expr_var));
+
+ gimple_set_block (assign_stmt, DECL_INITIAL (function_decl));
+ gimple_set_bb (assign_stmt, new_bb);
+ gimple_seq_add_stmt (&gseq, assign_stmt);
+ }
+ }
+
+ if_else_stmt = gimple_build_cond (GT_EXPR, and_expr_var,
+ integer_zero_node,
+ NULL_TREE, NULL_TREE);
+ gimple_set_block (if_else_stmt, DECL_INITIAL (function_decl));
+ gimple_set_bb (if_else_stmt, new_bb);
+ gimple_seq_add_stmt (&gseq, if_else_stmt);
+
+ gimple_seq_add_stmt (&gseq, convert_stmt);
+ gimple_seq_add_stmt (&gseq, return_stmt);
+ set_bb_seq (new_bb, gseq);
+
+ bb1 = new_bb;
+ e12 = split_block (bb1, if_else_stmt);
+ bb2 = e12->dest;
+ e12->flags &= ~EDGE_FALLTHRU;
+ e12->flags |= EDGE_TRUE_VALUE;
+
+ e23 = split_block (bb2, return_stmt);
+
+ gimple_set_bb (convert_stmt, bb2);
+ gimple_set_bb (return_stmt, bb2);
+
+ bb3 = e23->dest;
+ make_edge (bb1, bb3, EDGE_FALSE_VALUE);
+
+ remove_edge (e23);
+ make_edge (bb2, EXIT_BLOCK_PTR, 0);
+
+ pop_cfun ();
+
+ return bb3;
+}
+
+/* This parses the attribute arguments to target in DECL and determines
+ the right builtin to use to match the platform specification.
+ It returns the priority value for this version decl. If PREDICATE_LIST
+ is not NULL, it stores the list of cpu features that need to be checked
+ before dispatching this function. */
+
+static unsigned int
+get_builtin_code_for_version (tree decl, tree *predicate_list)
+{
+ tree attrs;
+ struct cl_target_option cur_target;
+ tree target_node;
+ struct cl_target_option *new_target;
+ const char *arg_str = NULL;
+ const char *attrs_str = NULL;
+ char *tok_str = NULL;
+ char *token;
+
+ /* Priority of i386 features, greater value is higher priority. This is
+ used to decide the order in which function dispatch must happen. For
+ instance, a version specialized for SSE4.2 should be checked for dispatch
+ before a version for SSE3, as SSE4.2 implies SSE3. */
+ enum feature_priority
+ {
+ P_ZERO = 0,
+ P_MMX,
+ P_SSE,
+ P_SSE2,
+ P_SSE3,
+ P_SSSE3,
+ P_PROC_SSSE3,
+ P_SSE4_a,
+ P_PROC_SSE4_a,
+ P_SSE4_1,
+ P_SSE4_2,
+ P_PROC_SSE4_2,
+ P_POPCNT,
+ P_AVX,
+ P_AVX2,
+ P_FMA,
+ P_PROC_FMA
+ };
+
+ enum feature_priority priority = P_ZERO;
+
+ /* These are the target attribute strings for which a dispatcher is
+ available, from fold_builtin_cpu. */
+
+ static struct _feature_list
+ {
+ const char *const name;
+ const enum feature_priority priority;
+ }
+ const feature_list[] =
+ {
+ {"mmx", P_MMX},
+ {"sse", P_SSE},
+ {"sse2", P_SSE2},
+ {"sse3", P_SSE3},
+ {"ssse3", P_SSSE3},
+ {"sse4.1", P_SSE4_1},
+ {"sse4.2", P_SSE4_2},
+ {"popcnt", P_POPCNT},
+ {"avx", P_AVX},
+ {"avx2", P_AVX2}
+ };
+
+
+ static unsigned int NUM_FEATURES
+ = sizeof (feature_list) / sizeof (struct _feature_list);
+
+ unsigned int i;
+
+ tree predicate_chain = NULL_TREE;
+ tree predicate_decl, predicate_arg;
+
+ attrs = lookup_attribute ("target", DECL_ATTRIBUTES (decl));
+ gcc_assert (attrs != NULL);
+
+ attrs = TREE_VALUE (TREE_VALUE (attrs));
+
+ gcc_assert (TREE_CODE (attrs) == STRING_CST);
+ attrs_str = TREE_STRING_POINTER (attrs);
+
+
+ /* Handle arch= if specified. For priority, set it to be 1 more than
+ the best instruction set the processor can handle. For instance, if
+ there is a version for atom and a version for ssse3 (the highest ISA
+ priority for atom), the atom version must be checked for dispatch
+ before the ssse3 version. */
+ if (strstr (attrs_str, "arch=") != NULL)
+ {
+ cl_target_option_save (&cur_target, &global_options);
+ target_node = ix86_valid_target_attribute_tree (attrs);
+
+ gcc_assert (target_node);
+ new_target = TREE_TARGET_OPTION (target_node);
+ gcc_assert (new_target);
+
+ if (new_target->arch_specified && new_target->arch > 0)
+ {
+ switch (new_target->arch)
+ {
+ case PROCESSOR_CORE2_32:
+ case PROCESSOR_CORE2_64:
+ arg_str = "core2";
+ priority = P_PROC_SSSE3;
+ break;
+ case PROCESSOR_COREI7_32:
+ case PROCESSOR_COREI7_64:
+ arg_str = "corei7";
+ priority = P_PROC_SSE4_2;
+ break;
+ case PROCESSOR_ATOM:
+ arg_str = "atom";
+ priority = P_PROC_SSSE3;
+ break;
+ case PROCESSOR_AMDFAM10:
+ arg_str = "amdfam10h";
+ priority = P_PROC_SSE4_a;
+ break;
+ case PROCESSOR_BDVER1:
+ arg_str = "bdver1";
+ priority = P_PROC_FMA;
+ break;
+ case PROCESSOR_BDVER2:
+ arg_str = "bdver2";
+ priority = P_PROC_FMA;
+ break;
+ }
+ }
+
+ cl_target_option_restore (&global_options, &cur_target);
+
+ if (predicate_list && arg_str == NULL)
+ {
+ error_at (DECL_SOURCE_LOCATION (decl),
+ "No dispatcher found for the versioning attributes");
+ return 0;
+ }
+
+ if (predicate_list)
+ {
+ predicate_decl = ix86_builtins [(int) IX86_BUILTIN_CPU_IS];
+ /* For a C string literal the length includes the trailing NULL. */
+ predicate_arg = build_string_literal (strlen (arg_str) + 1, arg_str);
+ predicate_chain = tree_cons (predicate_decl, predicate_arg,
+ predicate_chain);
+ }
+ }
+
+ /* Process feature name. */
+ tok_str = (char *) xmalloc (strlen (attrs_str) + 1);
+ strcpy (tok_str, attrs_str);
+ token = strtok (tok_str, ",");
+ predicate_decl = ix86_builtins [(int) IX86_BUILTIN_CPU_SUPPORTS];
+
+ while (token != NULL)
+ {
+ /* Do not process "arch=" */
+ if (strncmp (token, "arch=", 5) == 0)
+ {
+ token = strtok (NULL, ",");
+ continue;
+ }
+ for (i = 0; i < NUM_FEATURES; ++i)
+ {
+ if (strcmp (token, feature_list[i].name) == 0)
+ {
+ if (predicate_list)
+ {
+ predicate_arg = build_string_literal (
+ strlen (feature_list[i].name) + 1,
+ feature_list[i].name);
+ predicate_chain = tree_cons (predicate_decl, predicate_arg,
+ predicate_chain);
+ }
+ /* Find the maximum priority feature. */
+ if (feature_list[i].priority > priority)
+ priority = feature_list[i].priority;
+
+ break;
+ }
+ }
+ if (predicate_list && i == NUM_FEATURES)
+ {
+ error_at (DECL_SOURCE_LOCATION (decl),
+ "No dispatcher found for %s", token);
+ return 0;
+ }
+ token = strtok (NULL, ",");
+ }
+ free (tok_str);
+
+ if (predicate_list && predicate_chain == NULL_TREE)
+ {
+ error_at (DECL_SOURCE_LOCATION (decl),
+ "No dispatcher found for the versioning attributes : %s",
+ attrs_str);
+ return 0;
+ }
+ else if (predicate_list)
+ {
+ predicate_chain = nreverse (predicate_chain);
+ *predicate_list = predicate_chain;
+ }
+
+ return priority;
+}
+
+/* This compares the priority of target features in function DECL1
+ and DECL2. It returns positive value if DECL1 is higher priority,
+ negative value if DECL2 is higher priority and 0 if they are the
+ same. */
+
+static int
+ix86_compare_version_priority (tree decl1, tree decl2)
+{
+ unsigned int priority1 = 0;
+ unsigned int priority2 = 0;
+
+ if (lookup_attribute ("target", DECL_ATTRIBUTES (decl1)) != NULL)
+ priority1 = get_builtin_code_for_version (decl1, NULL);
+
+ if (lookup_attribute ("target", DECL_ATTRIBUTES (decl2)) != NULL)
+ priority2 = get_builtin_code_for_version (decl2, NULL);
+
+ return (int)priority1 - (int)priority2;
+}
+
+/* V1 and V2 point to function versions with different priorities
+ based on the target ISA. This function compares their priorities. */
+
+static int
+feature_compare (const void *v1, const void *v2)
+{
+ typedef struct _function_version_info
+ {
+ tree version_decl;
+ tree predicate_chain;
+ unsigned int dispatch_priority;
+ } function_version_info;
+
+ const function_version_info c1 = *(const function_version_info *)v1;
+ const function_version_info c2 = *(const function_version_info *)v2;
+ return (c2.dispatch_priority - c1.dispatch_priority);
+}
+
+/* This function generates the dispatch function for
+ multi-versioned functions. DISPATCH_DECL is the function which will
+ contain the dispatch logic. FNDECLS are the function choices for
+ dispatch, and is a tree chain. EMPTY_BB is the basic block pointer
+ in DISPATCH_DECL in which the dispatch code is generated. */
+
+static int
+dispatch_function_versions (tree dispatch_decl,
+ void *fndecls_p,
+ basic_block *empty_bb)
+{
+ tree default_decl;
+ gimple ifunc_cpu_init_stmt;
+ gimple_seq gseq;
+ int ix;
+ tree ele;
+ VEC (tree, heap) *fndecls;
+ unsigned int num_versions = 0;
+ unsigned int actual_versions = 0;
+ unsigned int i;
+
+ struct _function_version_info
+ {
+ tree version_decl;
+ tree predicate_chain;
+ unsigned int dispatch_priority;
+ }*function_version_info;
+
+ gcc_assert (dispatch_decl != NULL
+ && fndecls_p != NULL
+ && empty_bb != NULL);
+
+ /*fndecls_p is actually a vector. */
+ fndecls = (VEC (tree, heap) *)fndecls_p;
+
+ /* At least one more version other than the default. */
+ num_versions = VEC_length (tree, fndecls);
+ gcc_assert (num_versions >= 2);
+
+ function_version_info = (struct _function_version_info *)
+ XNEWVEC (struct _function_version_info, (num_versions - 1));
+
+ /* The first version in the vector is the default decl. */
+ default_decl = VEC_index (tree, fndecls, 0);
+
+ push_cfun (DECL_STRUCT_FUNCTION (dispatch_decl));
+
+ gseq = bb_seq (*empty_bb);
+ /* Function version dispatch is via IFUNC. IFUNC resolvers fire before
+ constructors, so explicity call __builtin_cpu_init here. */
+ ifunc_cpu_init_stmt = gimple_build_call_vec (
+ ix86_builtins [(int) IX86_BUILTIN_CPU_INIT], NULL);
+ gimple_seq_add_stmt (&gseq, ifunc_cpu_init_stmt);
+ gimple_set_bb (ifunc_cpu_init_stmt, *empty_bb);
+ set_bb_seq (*empty_bb, gseq);
+
+ pop_cfun ();
+
+
+ for (ix = 1; VEC_iterate (tree, fndecls, ix, ele); ++ix)
+ {
+ tree version_decl = ele;
+ tree predicate_chain = NULL_TREE;
+ unsigned int priority;
+ /* Get attribute string, parse it and find the right predicate decl.
+ The predicate function could be a lengthy combination of many
+ features, like arch-type and various isa-variants. */
+ priority = get_builtin_code_for_version (version_decl,
+ &predicate_chain);
+
+ if (predicate_chain == NULL_TREE)
+ continue;
+
+ actual_versions++;
+ function_version_info [ix - 1].version_decl = version_decl;
+ function_version_info [ix - 1].predicate_chain = predicate_chain;
+ function_version_info [ix - 1].dispatch_priority = priority;
+ }
+
+ /* Sort the versions according to descending order of dispatch priority. The
+ priority is based on the ISA. This is not a perfect solution. There
+ could still be ambiguity. If more than one function version is suitable
+ to execute, which one should be dispatched? In future, allow the user
+ to specify a dispatch priority next to the version. */
+ qsort (function_version_info, actual_versions,
+ sizeof (struct _function_version_info), feature_compare);
+
+ for (i = 0; i < actual_versions; ++i)
+ *empty_bb = add_condition_to_bb (dispatch_decl,
+ function_version_info[i].version_decl,
+ function_version_info[i].predicate_chain,
+ *empty_bb);
+
+ /* dispatch default version at the end. */
+ *empty_bb = add_condition_to_bb (dispatch_decl, default_decl,
+ NULL, *empty_bb);
+
+ free (function_version_info);
+ return 0;
+}
+
+/* This function returns true if FN1 and FN2 are versions of the same function,
+ that is, the targets of the function decls are different. This assumes
+ that FN1 and FN2 have the same signature. */
+
+static bool
+ix86_function_versions (tree fn1, tree fn2)
+{
+ tree attr1, attr2;
+ struct cl_target_option *target1, *target2;
+
+ if (TREE_CODE (fn1) != FUNCTION_DECL
+ || TREE_CODE (fn2) != FUNCTION_DECL)
+ return false;
+
+ attr1 = DECL_FUNCTION_SPECIFIC_TARGET (fn1);
+ attr2 = DECL_FUNCTION_SPECIFIC_TARGET (fn2);
+
+ /* Atleast one function decl should have target attribute specified. */
+ if (attr1 == NULL_TREE && attr2 == NULL_TREE)
+ return false;
+
+ if (attr1 == NULL_TREE)
+ attr1 = target_option_default_node;
+ else if (attr2 == NULL_TREE)
+ attr2 = target_option_default_node;
+
+ target1 = TREE_TARGET_OPTION (attr1);
+ target2 = TREE_TARGET_OPTION (attr2);
+
+ /* target1 and target2 must be different in some way. */
+ if (target1->x_ix86_isa_flags == target2->x_ix86_isa_flags
+ && target1->x_target_flags == target2->x_target_flags
+ && target1->arch == target2->arch
+ && target1->tune == target2->tune
+ && target1->x_ix86_fpmath == target2->x_ix86_fpmath
+ && target1->branch_cost == target2->branch_cost)
+ return false;
+
+ return true;
+}
+
+/* Comparator function to be used in qsort routine to sort attribute
+ specification strings to "target". */
+
+static int
+attr_strcmp (const void *v1, const void *v2)
+{
+ const char *c1 = *(char *const*)v1;
+ const char *c2 = *(char *const*)v2;
+ return strcmp (c1, c2);
+}
+
+/* STR is the argument to target attribute. This function tokenizes
+ the comma separated arguments, sorts them and returns a string which
+ is a unique identifier for the comma separated arguments. It also
+ replaces non-identifier characters "=,-" with "_". */
+
+static char *
+sorted_attr_string (const char *str)
+{
+ char **args = NULL;
+ char *attr_str, *ret_str;
+ char *attr = NULL;
+ unsigned int argnum = 1;
+ unsigned int i;
+
+ for (i = 0; i < strlen (str); i++)
+ if (str[i] == ',')
+ argnum++;
+
+ attr_str = (char *)xmalloc (strlen (str) + 1);
+ strcpy (attr_str, str);
+
+ /* Replace "=,-" with "_". */
+ for (i = 0; i < strlen (attr_str); i++)
+ if (attr_str[i] == '=' || attr_str[i]== '-')
+ attr_str[i] = '_';
+
+ if (argnum == 1)
+ return attr_str;
+
+ args = XNEWVEC (char *, argnum);
+
+ i = 0;
+ attr = strtok (attr_str, ",");
+ while (attr != NULL)
+ {
+ args[i] = attr;
+ i++;
+ attr = strtok (NULL, ",");
+ }
+
+ qsort (args, argnum, sizeof (char*), attr_strcmp);
+
+ ret_str = (char *)xmalloc (strlen (str) + 1);
+ strcpy (ret_str, args[0]);
+ for (i = 1; i < argnum; i++)
+ {
+ strcat (ret_str, "_");
+ strcat (ret_str, args[i]);
+ }
+
+ free (args);
+ free (attr_str);
+ return ret_str;
+}
+
+/* This function changes the assembler name for functions that are
+ versions. If DECL is a function version and has a "target"
+ attribute, it appends the attribute string to its assembler name. */
+
+static tree
+ix86_mangle_function_version_assembler_name (tree decl, tree id)
+{
+ tree version_attr;
+ const char *orig_name, *version_string, *attr_str;
+ char *assembler_name;
+
+ if (DECL_DECLARED_INLINE_P (decl)
+ && lookup_attribute ("gnu_inline",
+ DECL_ATTRIBUTES (decl)))
+ error_at (DECL_SOURCE_LOCATION (decl),
+ "Function versions cannot be marked as gnu_inline,"
+ " bodies have to be generated");
+
+ if (DECL_VIRTUAL_P (decl)
+ || DECL_VINDEX (decl))
+ error_at (DECL_SOURCE_LOCATION (decl),
+ "Virtual function versioning not supported\n");
+
+ version_attr = lookup_attribute ("target", DECL_ATTRIBUTES (decl));
+
+ /* target attribute string is NULL for default functions. */
+ if (version_attr == NULL_TREE)
+ return id;
+
+ orig_name = IDENTIFIER_POINTER (id);
+ version_string
+ = TREE_STRING_POINTER (TREE_VALUE (TREE_VALUE (version_attr)));
+
+ attr_str = sorted_attr_string (version_string);
+ assembler_name = (char *) xmalloc (strlen (orig_name)
+ + strlen (attr_str) + 2);
+
+ sprintf (assembler_name, "%s.%s", orig_name, attr_str);
+
+ /* Allow assembler name to be modified if already set. */
+ if (DECL_ASSEMBLER_NAME_SET_P (decl))
+ SET_DECL_RTL (decl, NULL);
+
+ return get_identifier (assembler_name);
+}
+
+static tree
+ix86_mangle_decl_assembler_name (tree decl, tree id)
+{
+ /* For function version, add the target suffix to the assembler name. */
+ if (TREE_CODE (decl) == FUNCTION_DECL
+ && DECL_FUNCTION_VERSIONED (decl))
+ return ix86_mangle_function_version_assembler_name (decl, id);
+
+ return id;
+}
+
+/* Return a new name by appending SUFFIX to the DECL name. If make_unique
+ is true, append the full path name of the source file. */
+
+static char *
+make_name (tree decl, const char *suffix, bool make_unique)
+{
+ char *global_var_name;
+ int name_len;
+ const char *name;
+ const char *unique_name = NULL;
+
+ name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
+
+ /* Get a unique name that can be used globally without any chances
+ of collision at link time. */
+ if (make_unique)
+ unique_name = IDENTIFIER_POINTER (get_file_function_name ("\0"));
+
+ name_len = strlen (name) + strlen (suffix) + 2;
+
+ if (make_unique)
+ name_len += strlen (unique_name) + 1;
+ global_var_name = XNEWVEC (char, name_len);
+
+ /* Use '.' to concatenate names as it is demangler friendly. */
+ if (make_unique)
+ snprintf (global_var_name, name_len, "%s.%s.%s", name,
+ unique_name, suffix);
+ else
+ snprintf (global_var_name, name_len, "%s.%s", name, suffix);
+
+ return global_var_name;
+}
+
+/* Make a dispatcher declaration for the multi-versioned function DECL.
+ Calls to DECL function will be replaced with calls to the dispatcher
+ by the front-end. Return the decl created. */
+
+static tree
+make_dispatcher_decl (const tree decl)
+{
+ tree func_decl;
+ char *func_name, *resolver_name;
+ tree fn_type, func_type;
+ bool is_uniq = false;
+
+ if (TREE_PUBLIC (decl) == 0)
+ is_uniq = true;
+
+ func_name = make_name (decl, "ifunc", is_uniq);
+ resolver_name = make_name (decl, "resolver", is_uniq);
+ gcc_assert (resolver_name);
+
+ fn_type = TREE_TYPE (decl);
+ func_type = build_function_type (TREE_TYPE (fn_type),
+ TYPE_ARG_TYPES (fn_type));
+
+ func_decl = build_fn_decl (func_name, func_type);
+ TREE_USED (func_decl) = 1;
+ DECL_CONTEXT (func_decl) = NULL_TREE;
+ DECL_INITIAL (func_decl) = error_mark_node;
+ DECL_ARTIFICIAL (func_decl) = 1;
+ /* Mark this func as external, the resolver will flip it again if
+ it gets generated. */
+ DECL_EXTERNAL (func_decl) = 1;
+ /* This will be of type IFUNCs have to be externally visible. */
+ TREE_PUBLIC (func_decl) = 1;
+
+ return func_decl;
+}
+
+/* Returns true if decl is multi-versioned and DECL is the default function,
+ that is it is not tagged with target specific optimization. */
+
+static bool
+is_function_default_version (const tree decl)
+{
+ return (TREE_CODE (decl) == FUNCTION_DECL
+ && DECL_FUNCTION_VERSIONED (decl)
+ && DECL_FUNCTION_SPECIFIC_TARGET (decl) == NULL_TREE);
+}
+
+/* Make a dispatcher declaration for the multi-versioned function DECL.
+ Calls to DECL function will be replaced with calls to the dispatcher
+ by the front-end. It also chains the cgraph nodes of all the
+ semantically identical versions in vector FN_VER_VEC_P. Returns the
+ decl of the dispatcher function. */
+
+static tree
+ix86_get_function_versions_dispatcher (void *fn_ver_vec_p)
+{
+ struct cgraph_node *node = NULL;
+ struct cgraph_node *default_node = NULL;
+ struct cgraph_node *dispatcher_node = NULL;
+
+ struct cgraph_function_version_info *default_version_info = NULL;
+ struct cgraph_function_version_info *dispatcher_version_info = NULL;
+ struct cgraph_function_version_info *node_version_info = NULL;
+
+ int ix;
+ tree ele;
+ tree dispatch_decl = NULL;
+ VEC (tree, heap) *fn_ver_vec = NULL;
+
+ fn_ver_vec = (VEC (tree,heap) *) fn_ver_vec_p;
+ gcc_assert (fn_ver_vec != NULL);
+
+ /* Find the default version. */
+ for (ix = 0; VEC_iterate (tree, fn_ver_vec, ix, ele); ++ix)
+ {
+ if (is_function_default_version (ele))
+ {
+ default_node = cgraph_get_create_node (ele);
+ break;
+ }
+ }
+
+ /* If there is no default node, just return NULL. */
+ if (!default_node)
+ return NULL;
+
+ default_version_info = get_cgraph_node_version (default_node);
+
+ /* If the dispatcher is already there, return it. */
+ if (default_version_info && default_version_info->dispatcher_resolver)
+ return default_version_info->dispatcher_resolver;
+
+ if (default_version_info == NULL)
+ default_version_info = insert_new_cgraph_node_version (default_node);
+
+#if defined (ASM_OUTPUT_TYPE_DIRECTIVE) && HAVE_GNU_INDIRECT_FUNCTION
+ /* Right now, the dispatching is done via ifunc. */
+ dispatch_decl = make_dispatcher_decl (default_node->symbol.decl);
+#else
+ error_at (DECL_SOURCE_LOCATION (default_node->symbol.decl),
+ "Multiversioning needs ifunc which is not supported "
+ "in this configuration");
+#endif
+
+ default_version_info->dispatcher_resolver = dispatch_decl;
+ dispatcher_node = cgraph_get_create_node (dispatch_decl);
+ gcc_assert (dispatcher_node);
+ dispatcher_node->dispatcher_function = 1;
+ dispatcher_version_info = insert_new_cgraph_node_version (dispatcher_node);
+ cgraph_mark_address_taken_node (default_node);
+
+ for (ix = 0; VEC_iterate (tree, fn_ver_vec, ix, ele); ++ix)
+ {
+ node = cgraph_get_create_node (ele);
+ gcc_assert (node != NULL && DECL_FUNCTION_VERSIONED (ele));
+
+ if (node == default_node)
+ continue;
+
+ node_version_info = get_cgraph_node_version (node);
+ if (node_version_info == NULL)
+ node_version_info = insert_new_cgraph_node_version (node);
+
+ gcc_assert (DECL_FUNCTION_SPECIFIC_TARGET (ele) != NULL_TREE);
+
+ /* Chain all the cgraph_function_version_info nodes that are
+ semantically identical. */
+ if (dispatcher_version_info->next)
+ {
+ node_version_info->next = dispatcher_version_info->next;
+ dispatcher_version_info->next->prev = node_version_info;
+ }
+
+ dispatcher_version_info->next = node_version_info;
+ node_version_info->prev = dispatcher_version_info;
+ node_version_info->dispatcher_resolver = dispatch_decl;
+ }
+
+ /* The default version should be the first node. */
+ default_version_info->next = dispatcher_version_info->next;
+ dispatcher_version_info->next->prev = default_version_info;
+
+ /* The dispatcher node should directly point to the default node. */
+ dispatcher_version_info->next = default_version_info;
+
+ return dispatch_decl;
+}
+
+/* Makes a function attribute of the form NAME(ARG_NAME) and chains
+ it to CHAIN. */
+
+static tree
+make_attribute (const char *name, const char *arg_name, tree chain)
+{
+ tree attr_name;
+ tree attr_arg_name;
+ tree attr_args;
+ tree attr;
+
+ attr_name = get_identifier (name);
+ attr_arg_name = build_string (strlen (arg_name), arg_name);
+ attr_args = tree_cons (NULL_TREE, attr_arg_name, NULL_TREE);
+ attr = tree_cons (attr_name, attr_args, chain);
+ return attr;
+}
+
+/* Make the resolver function decl to dispatch the versions of
+ a multi-versioned function, DEFAULT_DECL. Create an
+ empty basic block in the resolver and store the pointer in
+ EMPTY_BB. Return the decl of the resolver function. */
+
+static tree
+make_resolver_func (const tree default_decl,
+ const tree dispatch_decl,
+ basic_block *empty_bb)
+{
+ char *resolver_name;
+ tree decl, type, decl_name, t;
+ basic_block new_bb;
+ bool is_uniq = false;
+
+ /* IFUNC's have to be globally visible. So, if the default_decl is
+ not, then the name of the IFUNC should be made unique. */
+ if (TREE_PUBLIC (default_decl) == 0)
+ is_uniq = true;
+
+ /* Append the filename to the resolver function if the versions are
+ not externally visible. This is because the resolver function has
+ to be externally visible for the loader to find it. So, appending
+ the filename will prevent conflicts with a resolver function from
+ another module which is based on the same version name. */
+ resolver_name = make_name (default_decl, "resolver", is_uniq);
+
+ /* The resolver function should return a (void *). */
+ type = build_function_type_list (ptr_type_node, NULL_TREE);
+
+ decl = build_fn_decl (resolver_name, type);
+ decl_name = get_identifier (resolver_name);
+ SET_DECL_ASSEMBLER_NAME (decl, decl_name);
+
+ DECL_NAME (decl) = decl_name;
+ TREE_USED (decl) = 1;
+ DECL_ARTIFICIAL (decl) = 1;
+ DECL_IGNORED_P (decl) = 0;
+ /* IFUNC resolvers have to be externally visible. */
+ TREE_PUBLIC (decl) = 1;
+ DECL_UNINLINABLE (decl) = 1;
+
+ DECL_EXTERNAL (decl) = 0;
+ DECL_EXTERNAL (dispatch_decl) = 0;
+
+ DECL_CONTEXT (decl) = NULL_TREE;
+ DECL_INITIAL (decl) = make_node (BLOCK);
+ DECL_STATIC_CONSTRUCTOR (decl) = 0;
+ TREE_READONLY (decl) = 0;
+ DECL_PURE_P (decl) = 0;
+
+ if (DECL_COMDAT_GROUP (default_decl))
+ {
+ DECL_COMDAT (decl) = DECL_COMDAT (default_decl);
+ make_decl_one_only (decl, DECL_COMDAT_GROUP (default_decl));
+ }
+ else if (TREE_PUBLIC (default_decl))
+ {
+ /* In this case, each translation unit with a call to this
+ versioned function will put out a resolver. Ensure it
+ is comdat to keep just one copy. */
+ DECL_COMDAT (decl) = 1;
+ make_decl_one_only (decl, DECL_ASSEMBLER_NAME (decl));
+ }
+ /* Build result decl and add to function_decl. */
+ t = build_decl (UNKNOWN_LOCATION, RESULT_DECL, NULL_TREE, ptr_type_node);
+ DECL_ARTIFICIAL (t) = 1;
+ DECL_IGNORED_P (t) = 1;
+ DECL_RESULT (decl) = t;
+
+ gimplify_function_tree (decl);
+ push_cfun (DECL_STRUCT_FUNCTION (decl));
+ gimple_register_cfg_hooks ();
+ init_empty_tree_cfg_for_function (DECL_STRUCT_FUNCTION (decl));
+ cfun->curr_properties |=
+ (PROP_gimple_lcf | PROP_gimple_leh | PROP_cfg | PROP_ssa
+ | PROP_gimple_any);
+ cfun->curr_properties = 15;
+ new_bb = create_empty_bb (ENTRY_BLOCK_PTR);
+ make_edge (ENTRY_BLOCK_PTR, new_bb, EDGE_FALLTHRU);
+ make_edge (new_bb, EXIT_BLOCK_PTR, 0);
+ *empty_bb = new_bb;
+
+ cgraph_add_new_function (decl, true);
+ cgraph_call_function_insertion_hooks (cgraph_get_create_node (decl));
+
+ if (DECL_COMDAT_GROUP (default_decl))
+ {
+ gcc_assert (cgraph_get_node (default_decl));
+ symtab_add_to_same_comdat_group (
+ (symtab_node) cgraph_get_node (decl),
+ (symtab_node) cgraph_get_node (default_decl));
+ }
+
+ pop_cfun ();
+
+ gcc_assert (dispatch_decl != NULL);
+ /* Mark dispatch_decl as "ifunc" with resolver as resolver_name. */
+ DECL_ATTRIBUTES (dispatch_decl)
+ = make_attribute ("ifunc", resolver_name, DECL_ATTRIBUTES (dispatch_decl));
+
+ /* Create the alias for dispatch to resolver here. */
+ cgraph_create_function_alias (dispatch_decl, decl);
+ return decl;
+}
+
+/* Generate the dispatching code body to dispatch multi-versioned function
+ DECL. The target hook is called to process the "target" attributes and
+ provide the code to dispatch the right function at run-time. NODE points
+ to the dispatcher decl whose body will be created. */
+
+static tree
+ix86_generate_version_dispatcher_body (void *node_p)
+{
+ tree resolver_decl;
+ basic_block empty_bb;
+ VEC (tree, heap) *fn_ver_vec = NULL;
+ tree default_ver_decl;
+ struct cgraph_node *versn;
+ struct cgraph_node *node;
+
+ struct cgraph_function_version_info *node_version_info = NULL;
+ struct cgraph_function_version_info *versn_info = NULL;
+
+ node = (cgraph_node *)node_p;
+
+ node_version_info = get_cgraph_node_version (node);
+ gcc_assert (node->dispatcher_function
+ && node_version_info != NULL);
+
+ if (node_version_info->dispatcher_resolver)
+ return node_version_info->dispatcher_resolver;
+
+ /* The first version in the chain corresponds to the default version. */
+ default_ver_decl = node_version_info->next->this_node->symbol.decl;
+
+ /* node is going to be an alias, so remove the finalized bit. */
+ node->local.finalized = 0;
+
+ resolver_decl = make_resolver_func (default_ver_decl,
+ node->symbol.decl, &empty_bb);
+ node_version_info->dispatcher_resolver = resolver_decl;
+
+ push_cfun (DECL_STRUCT_FUNCTION (resolver_decl));
+
+ fn_ver_vec = VEC_alloc (tree, heap, 2);
+
+ for (versn_info = node_version_info->next; versn_info;
+ versn_info = versn_info->next)
+ {
+ versn = versn_info->this_node;
+ /* Check for virtual functions here again, as by this time it should
+ have been determined if this function needs a vtable index or
+ not. This happens for methods in derived classes that override
+ virtual methods in base classes but are not explicitly marked as
+ virtual. */
+ if (DECL_VINDEX (versn->symbol.decl))
+ error_at (DECL_SOURCE_LOCATION (versn->symbol.decl),
+ "Virtual function multiversioning not supported");
+ VEC_safe_push (tree, heap, fn_ver_vec, versn->symbol.decl);
+ }
+
+ dispatch_function_versions (resolver_decl, fn_ver_vec, &empty_bb);
+
+ rebuild_cgraph_edges ();
+ pop_cfun ();
+ return resolver_decl;
+}
+
/* This builds the processor_model struct type defined in
libgcc/config/i386/cpuinfo.c */
@@ -41005,6 +42002,9 @@ ix86_memmodel_check (unsigned HOST_WIDE_INT val)
#undef TARGET_PROFILE_BEFORE_PROLOGUE
#define TARGET_PROFILE_BEFORE_PROLOGUE ix86_profile_before_prologue
+#undef TARGET_MANGLE_DECL_ASSEMBLER_NAME
+#define TARGET_MANGLE_DECL_ASSEMBLER_NAME ix86_mangle_decl_assembler_name
+
#undef TARGET_ASM_UNALIGNED_HI_OP
#define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP
#undef TARGET_ASM_UNALIGNED_SI_OP
@@ -41098,6 +42098,17 @@ ix86_memmodel_check (unsigned HOST_WIDE_INT val)
#undef TARGET_FOLD_BUILTIN
#define TARGET_FOLD_BUILTIN ix86_fold_builtin
+#undef TARGET_COMPARE_VERSION_PRIORITY
+#define TARGET_COMPARE_VERSION_PRIORITY ix86_compare_version_priority
+
+#undef TARGET_GENERATE_VERSION_DISPATCHER_BODY
+#define TARGET_GENERATE_VERSION_DISPATCHER_BODY \
+ ix86_generate_version_dispatcher_body
+
+#undef TARGET_GET_FUNCTION_VERSIONS_DISPATCHER
+#define TARGET_GET_FUNCTION_VERSIONS_DISPATCHER \
+ ix86_get_function_versions_dispatcher
+
#undef TARGET_ENUM_VA_LIST_P
#define TARGET_ENUM_VA_LIST_P ix86_enum_va_list
@@ -41238,6 +42249,9 @@ ix86_memmodel_check (unsigned HOST_WIDE_INT val)
#undef TARGET_OPTION_PRINT
#define TARGET_OPTION_PRINT ix86_function_specific_print
+#undef TARGET_OPTION_FUNCTION_VERSIONS
+#define TARGET_OPTION_FUNCTION_VERSIONS ix86_function_versions
+
#undef TARGET_CAN_INLINE_P
#define TARGET_CAN_INLINE_P ix86_can_inline_p
===================================================================
@@ -0,0 +1,130 @@
+/* Test case to check if Multiversioning works. */
+/* { dg-do run { target i?86-*-* x86_64-*-* } } */
+/* { dg-require-ifunc "" } */
+/* { dg-options "-O2 -fPIC -mno-avx -mno-popcnt" } */
+
+#include <assert.h>
+
+/* Default version. */
+int foo ();
+/* The other versions of foo. Mix up the ordering and
+ check if the dispatching does it in the order of priority. */
+/* Check combination of target attributes. */
+int foo () __attribute__ ((target("arch=corei7,popcnt")));
+/* The target operands in this declaration and the definition are re-ordered.
+ This should still work. */
+int foo () __attribute__ ((target("ssse3,avx2")));
+
+/* Check for all target attributes for which dispatchers are available. */
+/* Check arch= */
+int foo () __attribute__((target("arch=core2")));
+int foo () __attribute__((target("arch=corei7")));
+int foo () __attribute__((target("arch=atom")));
+/* Check ISAs */
+int foo () __attribute__((target("avx")));
+int foo () __attribute__ ((target("arch=core2,sse4.2")));
+/* Check more arch=. */
+int foo () __attribute__((target("arch=amdfam10")));
+int foo () __attribute__((target("arch=bdver1")));
+int foo () __attribute__((target("arch=bdver2")));
+
+int (*p)() = &foo;
+int main ()
+{
+ int val = foo ();
+ assert (val == (*p)());
+
+ /* Check in the exact same order in which the dispatching
+ is expected to happen. */
+ if (__builtin_cpu_is ("bdver1"))
+ assert (val == 1);
+ else if (__builtin_cpu_is ("bdver2"))
+ assert (val == 2);
+ else if (__builtin_cpu_supports ("avx2")
+ && __builtin_cpu_supports ("ssse3"))
+ assert (val == 3);
+ else if (__builtin_cpu_supports ("avx"))
+ assert (val == 4);
+ else if (__builtin_cpu_is ("corei7")
+ && __builtin_cpu_supports ("popcnt"))
+ assert (val == 5);
+ else if (__builtin_cpu_is ("corei7"))
+ assert (val == 6);
+ else if (__builtin_cpu_is ("amdfam10h"))
+ assert (val == 7);
+ else if (__builtin_cpu_is ("core2")
+ && __builtin_cpu_supports ("sse4.2"))
+ assert (val == 8);
+ else if (__builtin_cpu_is ("core2"))
+ assert (val == 9);
+ else if (__builtin_cpu_is ("atom"))
+ assert (val == 10);
+ else
+ assert (val == 0);
+
+ return 0;
+}
+
+int foo ()
+{
+ return 0;
+}
+
+int __attribute__ ((target("arch=corei7,popcnt")))
+foo ()
+{
+ return 5;
+}
+int __attribute__ ((target("avx2,ssse3")))
+foo ()
+{
+ return 3;
+}
+
+int __attribute__ ((target("arch=core2")))
+foo ()
+{
+ return 9;
+}
+
+int __attribute__ ((target("arch=corei7")))
+foo ()
+{
+ return 6;
+}
+
+int __attribute__ ((target("arch=atom")))
+foo ()
+{
+ return 10;
+}
+
+int __attribute__ ((target("avx")))
+foo ()
+{
+ return 4;
+}
+
+int __attribute__ ((target("arch=core2,sse4.2")))
+foo ()
+{
+ return 8;
+}
+
+int __attribute__ ((target("arch=amdfam10")))
+foo ()
+{
+ return 7;
+}
+
+int __attribute__ ((target("arch=bdver1")))
+foo ()
+{
+ return 1;
+}
+
+int __attribute__ ((target("arch=bdver2")))
+foo ()
+{
+ return 2;
+}
===================================================================
@@ -0,0 +1,121 @@
+/* Test case to check if Multiversioning chooses the correct
+ dispatching order when versions are for various ISAs. */
+/* { dg-do run { target i?86-*-* x86_64-*-* } } */
+/* { dg-require-ifunc "" } */
+/* { dg-options "-O2 -mno-sse -mno-mmx -mno-popcnt -mno-avx" } */
+
+#include <assert.h>
+#include <stdio.h>
+
+/* Default version. */
+int foo ();
+/* The dispatch checks should be in the exact reverse order of the
+ declarations below. */
+int foo () __attribute__ ((target ("mmx")));
+int foo () __attribute__ ((target ("sse")));
+int foo () __attribute__ ((target ("sse2")));
+int foo () __attribute__ ((target ("sse3")));
+int foo () __attribute__ ((target ("ssse3")));
+int foo () __attribute__ ((target ("sse4.1")));
+int foo () __attribute__ ((target ("sse4.2")));
+int foo () __attribute__ ((target ("popcnt")));
+int foo () __attribute__ ((target ("avx")));
+int foo () __attribute__ ((target ("avx2")));
+
+int main ()
+{
+
+ int val = foo ();
+ printf ("val = %d\n", val);
+
+ if (__builtin_cpu_supports ("avx2"))
+ assert (val == 1);
+ else if (__builtin_cpu_supports ("avx"))
+ assert (val == 2);
+ else if (__builtin_cpu_supports ("popcnt"))
+ assert (val == 3);
+ else if (__builtin_cpu_supports ("sse4.2"))
+ assert (val == 4);
+ else if (__builtin_cpu_supports ("sse4.1"))
+ assert (val == 5);
+ else if (__builtin_cpu_supports ("ssse3"))
+ assert (val == 6);
+ else if (__builtin_cpu_supports ("sse3"))
+ assert (val == 7);
+ else if (__builtin_cpu_supports ("sse2"))
+ assert (val == 8);
+ else if (__builtin_cpu_supports ("sse"))
+ assert (val == 9);
+ else if (__builtin_cpu_supports ("mmx"))
+ assert (val == 10);
+ else
+ assert (val == 0);
+
+ return 0;
+}
+
+int
+foo ()
+{
+ return 0;
+}
+
+int __attribute__ ((target("mmx")))
+foo ()
+{
+ return 10;
+}
+
+int __attribute__ ((target("sse")))
+foo ()
+{
+ return 9;
+}
+
+int __attribute__ ((target("sse2")))
+foo ()
+{
+ return 8;
+}
+
+int __attribute__ ((target("sse3")))
+foo ()
+{
+ return 7;
+}
+
+int __attribute__ ((target("ssse3")))
+foo ()
+{
+ return 6;
+}
+
+int __attribute__ ((target("sse4.1")))
+foo ()
+{
+ return 5;
+}
+
+int __attribute__ ((target("sse4.2")))
+foo ()
+{
+ return 4;
+}
+
+int __attribute__ ((target("popcnt")))
+foo ()
+{
+ return 3;
+}
+
+int __attribute__ ((target("avx")))
+foo ()
+{
+ return 2;
+}
+
+int __attribute__ ((target("avx2")))
+foo ()
+{
+ return 1;
+}
===================================================================
@@ -0,0 +1,37 @@
+/* Test case to check if a call to a multiversioned function
+ is replaced with a direct call to the particular version when
+ the most specialized version's target attributes match the
+ caller.
+
+ In this program, foo is multiversioned but there is no default
+ function. This is an error if the call has to go through a
+ dispatcher. However, the call to foo in bar can be replaced
+ with a direct call to the popcnt version of foo. Hence, this
+ test should pass. */
+
+/* { dg-do run { target i?86-*-* x86_64-*-* } } */
+/* { dg-options "-O2 -mno-sse -mno-popcnt" } */
+
+
+/* Default version. */
+int __attribute__ ((target ("sse")))
+foo ()
+{
+ return 1;
+}
+int __attribute__ ((target ("popcnt")))
+foo ()
+{
+ return 0;
+}
+
+int __attribute__ ((target ("popcnt")))
+bar ()
+{
+ return foo ();
+}
+
+int main ()
+{
+ return bar ();
+}
===================================================================
@@ -0,0 +1,23 @@
+/* Test case to check if the compiler generates an error message
+ when the default version of a multiversioned function is absent
+ and its pointer is taken. */
+
+/* { dg-do compile { target i?86-*-* x86_64-*-* } } */
+/* { dg-options "-O2 -mno-sse -mno-popcnt" } */
+
+int __attribute__ ((target ("sse")))
+foo ()
+{
+ return 1;
+}
+int __attribute__ ((target ("popcnt")))
+foo ()
+{
+ return 0;
+}
+
+int main ()
+{
+ int (*p)() = &foo; /* { dg-error "Pointer to a multiversioned function without a default is not allowed" {} } */
+ return (*p)();
+}