Patchwork User directed Function Multiversioning via Function Overloading (issue5752064)

login
register
mail settings
Submitter Sriraman Tallam
Date May 12, 2012, 2:04 a.m.
Message ID <CAAs8HmzZkx+FfZ=fcfnfG7Z7buF5+z_GN_gA4r_a_80NUuTzdg@mail.gmail.com>
Download mbox | patch
Permalink /patch/158669/
State New
Headers show

Comments

Sriraman Tallam - May 12, 2012, 2:04 a.m.
Hi H.J.,

   I have updated the patch to improve the dispatching method like we
discussed. Each feature gets a priority now, and the dispatching is
done in priority order. Please see i386.c for the changes.

Patch also available for review here:  http://codereview.appspot.com/5752064

Thanks,
-Sri.

On Thu, May 10, 2012 at 10:55 AM, H.J. Lu <hjl.tools@gmail.com> wrote:
> On Wed, May 9, 2012 at 12:01 PM, Sriraman Tallam <tmsriram@google.com> wrote:
>> Hi,
>>
>> Attached new patch with more bug fixes. I will fix the dispatching
>> method to use prioirty of attributes in the next iteration.
>>
>> Patch also available for review here:  http://codereview.appspot.com/5752064
>>
>
> The patch looks OK to me.  Since testcase depends on the dispatching
> method,  I'd like to see the whole patch with the updated dispatching
> method.
>
> Thanks.
>
> --
> H.J.
H.J. Lu - May 12, 2012, 1:37 p.m.
On Fri, May 11, 2012 at 7:04 PM, Sriraman Tallam <tmsriram@google.com> wrote:
> Hi H.J.,
>
>   I have updated the patch to improve the dispatching method like we
> discussed. Each feature gets a priority now, and the dispatching is
> done in priority order. Please see i386.c for the changes.
>
> Patch also available for review here:  http://codereview.appspot.com/5752064
>

I think you need 3 tests:

1.  Only with ISA.
2.  Only with arch
3.  Mixed with ISA and arch

since test mixed ISA and arch may hide issues with ISA only or arch only.

Patch

Index: gcc/doc/tm.texi
===================================================================
--- gcc/doc/tm.texi	(revision 187371)
+++ gcc/doc/tm.texi	(working copy)
@@ -10997,6 +10997,14 @@  The result is another tree containing a simplified
 call's result.  If @var{ignore} is true the value will be ignored.
 @end deftypefn
 
+@deftypefn {Target Hook} int TARGET_DISPATCH_VERSION (tree @var{dispatch_decl}, void *@var{fndecls}, basic_block *@var{empty_bb})
+For multi-versioned function, this hook sets up the dispatcher.
+@var{dispatch_decl} is the function that will be used to dispatch the
+version. @var{fndecls} are the function choices for dispatch.
+@var{empty_bb} is an basic block in @var{dispatch_decl} where the
+code to do the dispatch will be added.
+@end deftypefn
+
 @deftypefn {Target Hook} {const char *} TARGET_INVALID_WITHIN_DOLOOP (const_rtx @var{insn})
 
 Take an instruction in @var{insn} and return NULL if it is valid within a
Index: gcc/doc/tm.texi.in
===================================================================
--- gcc/doc/tm.texi.in	(revision 187371)
+++ gcc/doc/tm.texi.in	(working copy)
@@ -10877,6 +10877,14 @@  The result is another tree containing a simplified
 call's result.  If @var{ignore} is true the value will be ignored.
 @end deftypefn
 
+@hook TARGET_DISPATCH_VERSION
+For multi-versioned function, this hook sets up the dispatcher.
+@var{dispatch_decl} is the function that will be used to dispatch the
+version. @var{fndecls} are the function choices for dispatch.
+@var{empty_bb} is an basic block in @var{dispatch_decl} where the
+code to do the dispatch will be added.
+@end deftypefn
+
 @hook TARGET_INVALID_WITHIN_DOLOOP
 
 Take an instruction in @var{insn} and return NULL if it is valid within a
Index: gcc/target.def
===================================================================
--- gcc/target.def	(revision 187371)
+++ gcc/target.def	(working copy)
@@ -1249,6 +1249,15 @@  DEFHOOK
  tree, (tree fndecl, int n_args, tree *argp, bool ignore),
  hook_tree_tree_int_treep_bool_null)
 
+/* Target hook to generate the dispatching code for calls to multi-versioned
+   functions.  DISPATCH_DECL is the function that will have the dispatching
+   logic.  FNDECLS are the list of choices for dispatch and EMPTY_BB is the
+   basic bloc in DISPATCH_DECL which will contain the code.  */
+DEFHOOK
+(dispatch_version,
+ "",
+ int, (tree dispatch_decl, void *fndecls, basic_block *empty_bb), NULL)
+
 /* Returns a code for a target-specific builtin that implements
    reciprocal of the function, or NULL_TREE if not available.  */
 DEFHOOK
Index: gcc/tree.h
===================================================================
--- gcc/tree.h	(revision 187371)
+++ gcc/tree.h	(working copy)
@@ -3528,6 +3528,12 @@  extern VEC(tree, gc) **decl_debug_args_insert (tre
 #define DECL_FUNCTION_SPECIFIC_OPTIMIZATION(NODE) \
    (FUNCTION_DECL_CHECK (NODE)->function_decl.function_specific_optimization)
 
+/* In FUNCTION_DECL, this is set if this function has other versions generated
+   using "target" attributes.  The default version is the one which does not
+   have any "target" attribute set. */
+#define DECL_FUNCTION_VERSIONED(NODE)\
+   (FUNCTION_DECL_CHECK (NODE)->function_decl.versioned_function)
+
 /* FUNCTION_DECL inherits from DECL_NON_COMMON because of the use of the
    arguments/result/saved_tree fields by front ends.   It was either inherit
    FUNCTION_DECL from non_common, or inherit non_common from FUNCTION_DECL,
@@ -3572,8 +3578,8 @@  struct GTY(()) tree_function_decl {
   unsigned looping_const_or_pure_flag : 1;
   unsigned has_debug_args_flag : 1;
   unsigned tm_clone_flag : 1;
-
-  /* 1 bit left */
+  unsigned versioned_function : 1;
+  /* No bits left.  */
 };
 
 /* The source language of the translation-unit.  */
Index: gcc/tree-pass.h
===================================================================
--- gcc/tree-pass.h	(revision 187371)
+++ gcc/tree-pass.h	(working copy)
@@ -453,6 +453,7 @@  extern struct gimple_opt_pass pass_tm_memopt;
 extern struct gimple_opt_pass pass_tm_edges;
 extern struct gimple_opt_pass pass_split_functions;
 extern struct gimple_opt_pass pass_feedback_split_functions;
+extern struct gimple_opt_pass pass_dispatch_versions;
 
 /* IPA Passes */
 extern struct simple_ipa_opt_pass pass_ipa_lower_emutls;
Index: gcc/multiversion.c
===================================================================
--- gcc/multiversion.c	(revision 0)
+++ gcc/multiversion.c	(revision 0)
@@ -0,0 +1,833 @@ 
+/* Function Multiversioning.
+   Copyright (C) 2012 Free Software Foundation, Inc.
+   Contributed by Sriraman Tallam (tmsriram@google.com)
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free
+Software Foundation; either version 3, or (at your option) any later
+version.
+
+GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>. */
+
+/* Holds the state for multi-versioned functions here. The front-end
+   updates the state as and when function versions are encountered.
+   This is then used to generate the dispatch code.  Also, the
+   optimization passes to clone hot paths involving versioned functions
+   will be done here.
+
+   Function versions are created by using the same function signature but
+   also tagging attribute "target" to specify the platform type for which
+   the version must be executed.  Here is an example:
+
+   int foo ()
+   {
+     printf ("Execute as default");
+     return 0;
+   }
+
+   int  __attribute__ ((target ("arch=corei7")))
+   foo ()
+   {
+     printf ("Execute for corei7");
+     return 0;
+   }
+   
+   int main ()
+   {
+     return foo ();
+   } 
+
+   The call to foo in main is replaced with a call to an IFUNC function that
+   contains the dispatch code to call the correct function version at
+   run-time.  */
+
+
+#include "config.h"
+#include "system.h"
+#include "coretypes.h"
+#include "tm.h"
+#include "tree.h"
+#include "tree-inline.h"
+#include "langhooks.h"
+#include "flags.h"
+#include "cgraph.h"
+#include "diagnostic.h"
+#include "toplev.h"
+#include "timevar.h"
+#include "params.h"
+#include "fibheap.h"
+#include "intl.h"
+#include "tree-pass.h"
+#include "hashtab.h"
+#include "coverage.h"
+#include "ggc.h"
+#include "tree-flow.h"
+#include "rtl.h"
+#include "ipa-prop.h"
+#include "basic-block.h"
+#include "toplev.h"
+#include "dbgcnt.h"
+#include "tree-dump.h"
+#include "output.h"
+#include "vecprim.h"
+#include "gimple-pretty-print.h"
+#include "ipa-inline.h"
+#include "target.h"
+#include "multiversion.h"
+
+typedef void * void_p;
+
+DEF_VEC_P (void_p);
+DEF_VEC_ALLOC_P (void_p, heap);
+
+/* Each function decl that is a function version gets an instance of this
+   structure.   Since this is called by the front-end, decl merging can
+   happen, where a decl created for a new declaration is merged with 
+   the old. In this case, the new decl is deleted and the IS_DELETED
+   field is set for the struct instance corresponding to the new decl.
+   IFUNC_DECL is the decl of the ifunc function for default decls.
+   IFUNC_RESOLVER_DECL is the decl of the dispatch function.  VERSIONS
+   is a vector containing the list of function versions  that are
+   the candidates for dispatch.  */
+
+typedef struct version_function_d {
+  tree decl;
+  tree ifunc_decl;
+  tree ifunc_resolver_decl;
+  VEC (void_p, heap) *versions;
+  bool is_deleted;
+} version_function;
+
+/* Hashmap has an entry for every function decl that has other function
+   versions.  For function decls that are the default, it also stores the
+   list of all the other function versions.  Each entry is a structure
+   of type version_function_d.  */
+static htab_t decl_version_htab = NULL;
+
+/* Hashtable helpers for decl_version_htab. */
+
+static hashval_t
+decl_version_htab_hash_descriptor (const void *p)
+{
+  const version_function *t = (const version_function *) p;
+  return htab_hash_pointer (t->decl);
+}
+
+/* Hashtable helper for decl_version_htab. */
+
+static int
+decl_version_htab_eq_descriptor (const void *p1, const void *p2)
+{
+  const version_function *t1 = (const version_function *) p1;
+  return htab_eq_pointer ((const void_p) t1->decl, p2);
+}
+
+/* Create the decl_version_htab.  */
+static void
+create_decl_version_htab (void)
+{
+  if (decl_version_htab == NULL)
+    decl_version_htab = htab_create (10, decl_version_htab_hash_descriptor,
+				     decl_version_htab_eq_descriptor, NULL);
+}
+
+/* Creates an instance of version_function for decl DECL.  */
+
+static version_function*
+new_version_function (const tree decl)
+{
+  version_function *v;
+  v = (version_function *)xmalloc(sizeof (version_function));
+  v->decl = decl;
+  v->ifunc_decl = NULL;
+  v->ifunc_resolver_decl = NULL;
+  v->versions = NULL;
+  v->is_deleted = false;
+  return v;
+}
+
+/* Comparator function to be used in qsort routine to sort attribute
+   specification strings to "target".  */
+
+static int
+attr_strcmp (const void *v1, const void *v2)
+{
+  const char *c1 = *(char *const*)v1;
+  const char *c2 = *(char *const*)v2;
+  return strcmp (c1, c2);
+}
+
+/* STR is the argument to target attribute.  This function tokenizes
+   the comma separated arguments, sorts them and returns a string which
+   is a unique identifier for the comma separated arguments.  */
+
+static char *
+sorted_attr_string (const char *str)
+{
+  char **args = NULL;
+  char *attr_str, *ret_str;
+  char *attr = NULL;
+  unsigned int argnum = 1;
+  unsigned int i;
+
+  for (i = 0; i < strlen (str); i++)
+    if (str[i] == ',')
+      argnum++;
+
+  attr_str = (char *)xmalloc (strlen (str) + 1);
+  strcpy (attr_str, str);
+
+  for (i = 0; i < strlen (attr_str); i++)
+    if (attr_str[i] == '=')
+      attr_str[i] = '_';
+
+  if (argnum == 1)
+    return attr_str;
+
+  args = (char **)xmalloc (argnum * sizeof (char *));
+
+  i = 0;
+  attr = strtok (attr_str, ",");
+  while (attr != NULL)
+    {
+      args[i] = attr;
+      i++;
+      attr = strtok (NULL, ",");
+    }
+
+  qsort (args, argnum, sizeof (char*), attr_strcmp);
+
+  ret_str = (char *)xmalloc (strlen (str) + 1);
+  strcpy (ret_str, args[0]);
+  for (i = 1; i < argnum; i++)
+    {
+      strcat (ret_str, "_");
+      strcat (ret_str, args[i]);
+    }
+
+  free (args);
+  free (attr_str);
+  return ret_str;
+}
+
+/* Returns true when only one of DECL1 and DECL2 is marked with "target"
+   or if the "target" attribute strings of DECL1 and DECL2 dont match.  */
+
+bool
+has_different_version_attributes (const tree decl1, const tree decl2)
+{
+  tree attr1, attr2;
+  char *c1, *c2;
+  bool ret = false;
+
+  if (TREE_CODE (decl1) != FUNCTION_DECL
+      || TREE_CODE (decl2) != FUNCTION_DECL)
+    return false;
+
+  attr1 = lookup_attribute ("target", DECL_ATTRIBUTES (decl1));
+  attr2 = lookup_attribute ("target", DECL_ATTRIBUTES (decl2));
+
+  if (attr1 == NULL_TREE && attr2 == NULL_TREE)
+    return false;
+
+  if ((attr1 == NULL_TREE && attr2 != NULL_TREE)
+      || (attr1 != NULL_TREE && attr2 == NULL_TREE))
+    return true;
+
+  c1 = sorted_attr_string (
+	TREE_STRING_POINTER (TREE_VALUE (TREE_VALUE (attr1))));
+  c2 = sorted_attr_string (
+	TREE_STRING_POINTER (TREE_VALUE (TREE_VALUE (attr2))));
+
+  if (strcmp (c1, c2) != 0)
+     ret = true;
+
+  free (c1);
+  free (c2);
+
+  return ret;
+}
+
+/* If this decl corresponds to a function and has "target" attribute,
+   append the attribute string to its assembler name.  */
+
+static void
+version_assembler_name (const tree decl)
+{
+  tree version_attr;
+  const char *orig_name, *version_string, *attr_str;
+  char *assembler_name;
+  tree assembler_name_tree;
+  
+  if (TREE_CODE (decl) != FUNCTION_DECL)
+    return;
+
+  if (DECL_DECLARED_INLINE_P (decl)
+      &&lookup_attribute ("gnu_inline",
+			  DECL_ATTRIBUTES (decl)))
+    error_at (DECL_SOURCE_LOCATION (decl),
+	      "Function versions cannot be marked as gnu_inline,"
+	      " bodies have to be generated\n");
+
+  if (DECL_VIRTUAL_P (decl)
+      || DECL_VINDEX (decl))
+    error_at (DECL_SOURCE_LOCATION (decl),
+	      "Virtual function versioning not supported\n");
+
+  version_attr = lookup_attribute ("target", DECL_ATTRIBUTES (decl));
+  /* target attribute string is NULL for default functions.  */
+  if (version_attr == NULL_TREE)
+    return;
+
+  orig_name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
+  version_string
+    = TREE_STRING_POINTER (TREE_VALUE (TREE_VALUE (version_attr)));
+
+  attr_str = sorted_attr_string (version_string);
+  assembler_name = (char *) xmalloc (strlen (orig_name)
+				     + strlen (attr_str) + 2);
+
+  sprintf (assembler_name, "%s.%s", orig_name, attr_str);
+  if (dump_file)
+    fprintf (dump_file, "Assembler name set to %s for function version %s\n",
+	     assembler_name, IDENTIFIER_POINTER (DECL_NAME (decl)));
+
+  assembler_name_tree = get_identifier (assembler_name);
+
+  SET_DECL_ASSEMBLER_NAME (decl, assembler_name_tree);
+  SET_DECL_RTL (decl, NULL);
+}
+
+void
+mark_function_as_version (const tree decl)
+{
+  if (DECL_FUNCTION_VERSIONED (decl))
+    return;
+  DECL_FUNCTION_VERSIONED (decl) = 1;
+  version_assembler_name (decl);
+}
+
+/* Returns true if function DECL has target attribute set.  This could be
+   a version.  */
+
+bool
+is_target_attribute_set (const tree decl)
+{
+  return (TREE_CODE (decl) == FUNCTION_DECL
+	  && (lookup_attribute ("target", DECL_ATTRIBUTES (decl))
+	      != NULL_TREE));
+}
+
+/* Returns true if decl is multi-versioned and DECL is the default function,
+   that is it is not tagged with "target" attribute.  */
+
+bool
+is_default_function (const tree decl)
+{
+  return (TREE_CODE (decl) == FUNCTION_DECL
+	  && DECL_FUNCTION_VERSIONED (decl)
+	  && (lookup_attribute ("target", DECL_ATTRIBUTES (decl))
+	      == NULL_TREE));	
+}
+
+/* For function decl DECL, find the version_function struct in the
+   decl_version_htab.  */
+
+static version_function *
+find_function_version (const tree decl)
+{
+  void *slot;
+
+  if (!DECL_FUNCTION_VERSIONED (decl))
+    return NULL;
+
+  if (!decl_version_htab)
+    return NULL;
+
+  slot = htab_find_with_hash (decl_version_htab, decl,
+                              htab_hash_pointer (decl));
+
+  if (slot != NULL)
+    return (version_function *)slot;
+
+  return NULL;
+}
+
+/* Record DECL as a function version by creating a version_function struct
+   for it and storing it in the hashtable.  */
+
+static version_function *
+add_function_version (const tree decl)
+{
+  void **slot;
+  version_function *v;
+
+  if (!DECL_FUNCTION_VERSIONED (decl))
+    return NULL;
+
+  create_decl_version_htab ();
+
+  slot = htab_find_slot_with_hash (decl_version_htab, (const void_p)decl,
+                                   htab_hash_pointer ((const void_p)decl),
+				   INSERT);
+
+  if (*slot != NULL)
+    return (version_function *)*slot;
+
+  v = new_version_function (decl);
+  *slot = v;
+
+  return v;
+}
+
+/* Push V into VEC only if it is not already present.  If already present
+   returns false.  */
+
+static bool
+push_function_version (version_function *v, VEC (void_p, heap) **vec)
+{
+  int ix;
+  void_p ele; 
+  for (ix = 0; VEC_iterate (void_p, *vec, ix, ele); ++ix)
+    {
+      if (ele == (void_p)v)
+        return false;
+    }
+
+  VEC_safe_push (void_p, heap, *vec, (void*)v);
+  return true;
+}
+ 
+/* Mark DECL as deleted.  This is called by the front-end when a duplicate
+   decl is merged with the original decl and the duplicate decl is deleted.
+   This function marks the duplicate_decl as invalid.  Called by
+   duplicate_decls in cp/decl.c.  */
+
+void
+mark_delete_decl_version (const tree decl)
+{
+  version_function *decl_v;
+
+  decl_v = find_function_version (decl);
+
+  if (decl_v == NULL)
+    return;
+
+  decl_v->is_deleted = true;
+
+  if (is_default_function (decl)
+      && decl_v->versions != NULL)
+    {
+      VEC_truncate (void_p, decl_v->versions, 0);
+      VEC_free (void_p, heap, decl_v->versions);
+      decl_v->versions = NULL;
+    }
+}
+
+/* Mark DECL1 and DECL2 to be function versions in the same group.  One
+   of DECL1 and DECL2 must be the default, otherwise this function does
+   nothing.  This function aggregates the versions.  */
+
+int
+group_function_versions (const tree decl1, const tree decl2)
+{
+  tree default_decl, version_decl;
+  version_function *default_v, *version_v;
+
+  gcc_assert (DECL_FUNCTION_VERSIONED (decl1)
+	      && DECL_FUNCTION_VERSIONED (decl2));
+
+  /* The version decls are added only to the default decl.  */
+  if (!is_default_function (decl1)
+      && !is_default_function (decl2))
+    return 0;
+
+  /* This can happen with duplicate declarations.  Just ignore.  */
+  if (is_default_function (decl1)
+      && is_default_function (decl2))
+    return 0;
+
+  default_decl = (is_default_function (decl1)) ? decl1 : decl2;
+  version_decl = (default_decl == decl1) ? decl2 : decl1;
+
+  gcc_assert (default_decl != version_decl);
+  create_decl_version_htab ();
+
+  /* If the version function is found, it has been added.  */
+  if (find_function_version (version_decl))
+    return 0;
+
+  default_v = add_function_version (default_decl);
+  version_v = add_function_version (version_decl);
+
+  if (default_v->versions == NULL)
+    default_v->versions = VEC_alloc (void_p, heap, 1);
+
+  push_function_version (version_v, &default_v->versions);
+  return 0;
+}
+
+/* Makes a function attribute of the form NAME(ARG_NAME) and chains
+   it to CHAIN.  */
+
+static tree
+make_attribute (const char *name, const char *arg_name, tree chain)
+{
+  tree attr_name;
+  tree attr_arg_name;
+  tree attr_args;
+  tree attr;
+
+  attr_name = get_identifier (name);
+  attr_arg_name = build_string (strlen (arg_name), arg_name);
+  attr_args = tree_cons (NULL_TREE, attr_arg_name, NULL_TREE);
+  attr = tree_cons (attr_name, attr_args, chain);
+  return attr;
+}
+
+/* Return a new name by appending SUFFIX to the DECL name.  If
+   make_unique is true, append the full path name.  */
+
+static char *
+make_name (tree decl, const char *suffix, bool make_unique)
+{
+  char *global_var_name;
+  int name_len;
+  const char *name;
+  const char *unique_name = NULL;
+
+  name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
+
+  /* Get a unique name that can be used globally without any chances
+     of collision at link time.  */
+  if (make_unique)
+    unique_name = IDENTIFIER_POINTER (get_file_function_name ("\0"));
+
+  name_len = strlen (name) + strlen (suffix) + 2;
+
+  if (make_unique)
+    name_len += strlen (unique_name) + 1;
+  global_var_name = (char *) xmalloc (name_len);
+
+  /* Use '.' to concatenate names as it is demangler friendly.  */
+  if (make_unique)
+      snprintf (global_var_name, name_len, "%s.%s.%s", name,
+		unique_name, suffix);
+  else
+      snprintf (global_var_name, name_len, "%s.%s", name, suffix);
+
+  return global_var_name;
+}
+
+/* Make the resolver function decl for ifunc (IFUNC_DECL) to dispatch
+   the versions of multi-versioned function DEFAULT_DECL.  Create and
+   empty basic block in the resolver and store the pointer in
+   EMPTY_BB.  Return the decl of the resolver function.  */
+
+static tree
+make_ifunc_resolver_func (const tree default_decl,
+			  const tree ifunc_decl,
+			  basic_block *empty_bb)
+{
+  char *resolver_name;
+  tree decl, type, decl_name, t;
+  basic_block new_bb;
+  tree old_current_function_decl;
+  bool make_unique = false;
+
+  /* IFUNC's have to be globally visible.  So, if the default_decl is
+     not, then the name of the IFUNC should be made unique.  */
+  if (TREE_PUBLIC (default_decl) == 0)
+    make_unique = true;
+
+  /* Append the filename to the resolver function if the versions are
+     not externally visible.  This is because the resolver function has
+     to be externally visible for the loader to find it.  So, appending
+     the filename will prevent conflicts with a resolver function from
+     another module which is based on the same version name.  */
+  resolver_name = make_name (default_decl, "resolver", make_unique);
+
+  /* The resolver function should return a (void *). */
+  type = build_function_type_list (ptr_type_node, NULL_TREE);
+
+  decl = build_fn_decl (resolver_name, type);
+  decl_name = get_identifier (resolver_name);
+  SET_DECL_ASSEMBLER_NAME (decl, decl_name);
+
+  DECL_NAME (decl) = decl_name;
+  TREE_USED (decl) = TREE_USED (default_decl);
+  DECL_ARTIFICIAL (decl) = 1;
+  DECL_IGNORED_P (decl) = 0;
+  /* IFUNC resolvers have to be externally visible.  */
+  TREE_PUBLIC (decl) = 1;
+  DECL_UNINLINABLE (decl) = 1;
+
+  DECL_EXTERNAL (decl) = DECL_EXTERNAL (default_decl);
+  DECL_EXTERNAL (ifunc_decl) = 0;
+
+  DECL_CONTEXT (decl) = NULL_TREE;
+  DECL_INITIAL (decl) = make_node (BLOCK);
+  DECL_STATIC_CONSTRUCTOR (decl) = 0;
+  TREE_READONLY (decl) = 0;
+  DECL_PURE_P (decl) = 0;
+  DECL_COMDAT (decl) = DECL_COMDAT (default_decl);
+  if (DECL_COMDAT_GROUP (default_decl))
+    {
+      make_decl_one_only (decl, DECL_COMDAT_GROUP (default_decl));
+    }
+  /* Build result decl and add to function_decl. */
+  t = build_decl (UNKNOWN_LOCATION, RESULT_DECL, NULL_TREE, ptr_type_node);
+  DECL_ARTIFICIAL (t) = 1;
+  DECL_IGNORED_P (t) = 1;
+  DECL_RESULT (decl) = t;
+
+  gimplify_function_tree (decl);
+  old_current_function_decl = current_function_decl;
+  push_cfun (DECL_STRUCT_FUNCTION (decl));
+  current_function_decl = decl;
+  init_empty_tree_cfg_for_function (DECL_STRUCT_FUNCTION (decl));
+  cfun->curr_properties |=
+    (PROP_gimple_lcf | PROP_gimple_leh | PROP_cfg | PROP_referenced_vars |
+     PROP_ssa);
+  new_bb = create_empty_bb (ENTRY_BLOCK_PTR);
+  make_edge (ENTRY_BLOCK_PTR, new_bb, EDGE_FALLTHRU);
+  make_edge (new_bb, EXIT_BLOCK_PTR, 0);
+  *empty_bb = new_bb;
+
+  cgraph_add_new_function (decl, true);
+  cgraph_call_function_insertion_hooks (cgraph_get_create_node (decl));
+  cgraph_mark_force_output_node (cgraph_get_create_node (decl));
+
+  if (DECL_COMDAT_GROUP (default_decl))
+    {
+      gcc_assert (cgraph_get_node (default_decl));
+      symtab_add_to_same_comdat_group (
+	(symtab_node) cgraph_get_node (decl),
+	(symtab_node) cgraph_get_node (default_decl));
+    }
+
+  pop_cfun ();
+  current_function_decl = old_current_function_decl;
+
+  gcc_assert (ifunc_decl != NULL);
+  /* Mark ifunc_decl as "ifunc" with resolver as resolver_name.  */
+  DECL_ATTRIBUTES (ifunc_decl) 
+    = make_attribute ("ifunc", resolver_name, DECL_ATTRIBUTES (ifunc_decl));
+
+  /* Create the alias here.  */
+  cgraph_create_function_alias (ifunc_decl, decl);
+  return decl;
+}
+
+/* Make and ifunc declaration for the multi-versioned function DECL.  Calls to
+   DECL function will be replaced with calls to the ifunc.   Return the decl
+   of the ifunc created.  */
+
+static tree
+make_ifunc_func (const tree decl)
+{
+  tree ifunc_decl;
+  char *ifunc_name, *resolver_name;
+  tree fn_type, ifunc_type;
+  bool make_unique = false;
+
+  if (TREE_PUBLIC (decl) == 0)
+    make_unique = true;
+
+  ifunc_name = make_name (decl, "ifunc", make_unique);
+  resolver_name = make_name (decl, "resolver", make_unique);
+  gcc_assert (resolver_name);
+
+  fn_type = TREE_TYPE (decl);
+  ifunc_type = build_function_type (TREE_TYPE (fn_type),
+				    TYPE_ARG_TYPES (fn_type));
+  
+  ifunc_decl = build_fn_decl (ifunc_name, ifunc_type);
+  TREE_USED (ifunc_decl) = 1;
+  DECL_CONTEXT (ifunc_decl) = NULL_TREE;
+  DECL_INITIAL (ifunc_decl) = error_mark_node;
+  DECL_ARTIFICIAL (ifunc_decl) = 1;
+  /* Mark this ifunc as external, the resolver will flip it again if
+     it gets generated.  */
+  DECL_EXTERNAL (ifunc_decl) = 1;
+  /* IFUNCs have to be externally visible.  */
+  TREE_PUBLIC (ifunc_decl) = 1;
+
+  return ifunc_decl;  
+}
+
+/* For multi-versioned function decl, which should also be the default,
+   return the decl of the ifunc resolver, create it if it does not
+   exist.  */
+
+tree
+get_ifunc_for_version (const tree decl)
+{
+  version_function *decl_v;
+  int ix;
+  void_p ele;
+
+  /* DECL has to be the default version, otherwise it is missing and
+     that is not allowed.  */
+  if (!is_default_function (decl))
+    {
+      error_at (DECL_SOURCE_LOCATION (decl), "Default version not found");
+      return decl;
+    }
+
+  decl_v = find_function_version (decl);
+  gcc_assert (decl_v != NULL);
+  if (decl_v->ifunc_decl == NULL)
+    {
+      tree ifunc_decl;
+      ifunc_decl = make_ifunc_func (decl);
+      decl_v->ifunc_decl = ifunc_decl;
+    }
+
+  if (cgraph_get_node (decl))
+    cgraph_mark_force_output_node (cgraph_get_node (decl));
+
+  for (ix = 0; VEC_iterate (void_p, decl_v->versions, ix, ele); ++ix)
+    {
+      version_function *v = (version_function *) ele;
+      /* This could be a deleted version.  Happens with
+	 duplicate declarations. */
+      if (v->is_deleted)
+	continue;
+      gcc_assert (v->decl != NULL);
+      if (cgraph_get_node (v->decl))
+	cgraph_mark_force_output_node (cgraph_get_node (v->decl));
+    }
+
+  return decl_v->ifunc_decl;
+}
+
+/* Generate the dispatching code to dispatch multi-versioned function
+   DECL.  Make a new function decl for dispatching and call the target
+   hook to process the "target" attributes and provide the code to
+   dispatch the right function at run-time.  */
+
+static tree
+make_ifunc_resolver_for_version (const tree decl)
+{
+  version_function *decl_v;
+  tree ifunc_resolver_decl, ifunc_decl;
+  basic_block empty_bb;
+  int ix;
+  void_p ele;
+  VEC (tree, heap) *fn_ver_vec = NULL;
+  tree old_current_function_decl;
+
+  gcc_assert (is_default_function (decl));
+
+  decl_v = find_function_version (decl);
+  gcc_assert (decl_v != NULL);
+
+  if (decl_v->ifunc_resolver_decl != NULL)
+    return decl_v->ifunc_resolver_decl;
+
+  ifunc_decl = decl_v->ifunc_decl;
+
+  if (ifunc_decl == NULL)
+    ifunc_decl = decl_v->ifunc_decl = make_ifunc_func (decl);
+
+  ifunc_resolver_decl = make_ifunc_resolver_func (decl, ifunc_decl,
+						  &empty_bb);
+
+  old_current_function_decl = current_function_decl;
+  push_cfun (DECL_STRUCT_FUNCTION (ifunc_resolver_decl));
+  current_function_decl = ifunc_resolver_decl;
+
+  fn_ver_vec = VEC_alloc (tree, heap, 2);
+  VEC_safe_push (tree, heap, fn_ver_vec, decl);
+
+  for (ix = 0; VEC_iterate (void_p, decl_v->versions, ix, ele); ++ix)
+    {
+      version_function *v = (version_function *) ele;
+      gcc_assert (v->decl != NULL);
+      /* Check for virtual functions here again, as by this time it should
+	 have been determined if this function needs a vtable index or
+	 not.  This happens for methods in derived classes that override
+	 virtual methods in base classes but are not explicitly marked as
+	 virtual.  */
+      if (DECL_VINDEX (v->decl))
+        error_at (DECL_SOURCE_LOCATION (v->decl),
+		  "Virtual function versioning not supported\n");
+      if (!v->is_deleted)
+	VEC_safe_push (tree, heap, fn_ver_vec, v->decl);
+    }
+
+  gcc_assert (targetm.dispatch_version);
+  targetm.dispatch_version (ifunc_resolver_decl, fn_ver_vec, &empty_bb);
+  decl_v->ifunc_resolver_decl = ifunc_resolver_decl;
+
+  pop_cfun ();
+  current_function_decl = old_current_function_decl;
+  return ifunc_resolver_decl;
+}
+
+/* Main entry point to pass_dispatch_versions. For multi-versioned functions,
+   generate the dispatching code.  */
+
+static unsigned int
+do_dispatch_versions (void)
+{
+  /* A new pass for generating dispatch code for multi-versioned functions.
+     Other forms of dispatch can be added when ifunc support is not available
+     like just calling the function directly after checking for target type.
+     Currently, dispatching is done through IFUNC.  This pass will become
+     more meaningful when other dispatch mechanisms are added.  */
+
+  /* Cloning a function to produce more versions will happen here when the
+     user requests that via the target attribute. For example,
+     int foo () __attribute__ ((target(("arch=core2"), ("arch=corei7"))));
+     means that the user wants the same body of foo to be versioned for core2
+     and corei7.  In that case, this function will be cloned during this
+     pass.  */
+  
+  if (DECL_FUNCTION_VERSIONED (current_function_decl)
+      && is_default_function (current_function_decl))
+    {
+      tree decl = make_ifunc_resolver_for_version (current_function_decl);
+      if (dump_file && decl)
+	dump_function_to_file (decl, dump_file, TDF_BLOCKS);
+    }
+  return 0;
+}
+
+static  bool
+gate_dispatch_versions (void)
+{
+  return true;
+}
+
+/* A pass to generate the dispatch code to execute the appropriate version
+   of a multi-versioned function at run-time.  */
+
+struct gimple_opt_pass pass_dispatch_versions =
+{
+ {
+  GIMPLE_PASS,
+  "dispatch_multiversion_functions",    /* name */
+  gate_dispatch_versions,		/* gate */
+  do_dispatch_versions,			/* execute */
+  NULL,					/* sub */
+  NULL,					/* next */
+  0,					/* static_pass_number */
+  TV_MULTIVERSION_DISPATCH,		/* tv_id */
+  PROP_cfg,				/* properties_required */
+  PROP_cfg,				/* properties_provided */
+  0,					/* properties_destroyed */
+  0,					/* todo_flags_start */
+  0					/* todo_flags_finish */
+ }
+};
Index: gcc/cgraphunit.c
===================================================================
--- gcc/cgraphunit.c	(revision 187371)
+++ gcc/cgraphunit.c	(working copy)
@@ -420,6 +420,13 @@  cgraph_finalize_function (tree decl, bool nested)
       && !DECL_DISREGARD_INLINE_LIMITS (decl))
     node->symbol.force_output = 1;
 
+  /* With function versions, keep inline functions and do not worry about
+     inline limits.  */
+  if (DECL_FUNCTION_VERSIONED (decl)
+      && DECL_DECLARED_INLINE_P (decl)
+      && !DECL_EXTERNAL (decl))
+    node->symbol.force_output = 1;
+
   /* When not optimizing, also output the static functions. (see
      PR24561), but don't do so for always_inline functions, functions
      declared inline and nested functions.  These were optimized out
Index: gcc/multiversion.h
===================================================================
--- gcc/multiversion.h	(revision 0)
+++ gcc/multiversion.h	(revision 0)
@@ -0,0 +1,55 @@ 
+/* Function Multiversioning.
+   Copyright (C) 2012 Free Software Foundation, Inc.
+   Contributed by Sriraman Tallam (tmsriram@google.com)
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free
+Software Foundation; either version 3, or (at your option) any later
+version.
+
+GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>. */
+
+/* This is the header file which provides the functions to keep track
+   of functions that are multi-versioned and to generate the dispatch
+   code to call the right version at run-time.  */
+
+#ifndef GCC_MULTIVERSION_H
+#define GCC_MULTIVERION_H
+
+#include "tree.h"
+
+/* Mark DECL1 and DECL2 as function versions.  */
+int group_function_versions (const tree decl1, const tree decl2);
+
+/* Mark DECL as deleted and no longer a version.  */
+void mark_delete_decl_version (const tree decl);
+
+/* Returns true if DECL is the default version to be executed if all
+   other versions are inappropriate at run-time.  */
+bool is_default_function (const tree decl);
+
+/* Gets the IFUNC dispatcher for this multi-versioned function DECL. DECL
+   must be the default function in the multi-versioned group.  */
+tree get_ifunc_for_version (const tree decl);
+
+/* Returns true when only one of DECL1 and DECL2 is marked with "target"
+   or if the "target" attribute strings of  DECL1 and DECL2 dont match.  */
+bool has_different_version_attributes (const tree decl1, const tree decl2);
+
+/* Function DECL is marked to be a multi-versioned function.  If DECL is
+   not the default version, the assembler name of DECL is changed to include
+   the attribute string to keep the name unambiguous.  */
+void mark_function_as_version (const tree decl);
+
+/* Check if decl is FUNCTION_DECL with target attribute set.  */
+bool is_target_attribute_set (const tree decl);
+#endif
Index: gcc/testsuite/g++.dg/mv1.C
===================================================================
--- gcc/testsuite/g++.dg/mv1.C	(revision 0)
+++ gcc/testsuite/g++.dg/mv1.C	(revision 0)
@@ -0,0 +1,200 @@ 
+/* Test case to check if Multiversioning works.  */
+/* { dg-do run { target i?86-*-* x86_64-*-* } } */
+/* { dg-require-ifunc "" }  */
+/* { dg-options "-O2 -fPIC" } */
+
+#include <assert.h>
+
+/* Default version.  */
+int foo ();
+/* The other versions of foo.  Mix up the ordering and 
+   check if the dispatching does it in the order of priority. */
+/* Check combination of target attributes.  */
+int foo () __attribute__ ((target("arch=corei7,popcnt")));
+/* The target operands in this declaration and the definition are re-ordered.
+   This should still work.  */
+int foo () __attribute__ ((target("ssse3,avx2")));
+
+/* Check for all target attributes for which dispatchers are available.  */
+/* Check arch= */
+int foo () __attribute__((target("arch=core2")));
+int foo () __attribute__((target("arch=corei7")));
+int foo () __attribute__((target("arch=atom")));
+int foo () __attribute__((target("arch=amdfam10")));
+int foo () __attribute__((target("arch=bdver1")));
+int foo () __attribute__((target("arch=bdver2")));
+/* Check ISAs  */
+int foo () __attribute__((target("sse3")));
+int foo () __attribute__((target("sse2")));
+int foo () __attribute__((target("sse")));
+int foo () __attribute__((target("avx")));
+int foo () __attribute__((target("sse4.2")));
+int foo () __attribute__((target("popcnt")));
+int foo () __attribute__((target("sse4.1")));
+int foo () __attribute__((target("ssse3")));
+int foo () __attribute__((target("mmx")));
+int foo () __attribute__((target("avx2")));
+int (*p)() = &foo;
+int main ()
+{
+  int val = foo ();
+  assert (val ==  (*p)());
+
+  /* Check in the exact same order in which the dispatching
+     is expected to happen.  */
+  if (__builtin_cpu_is ("bdver1"))
+    assert (val == 1);
+  else if (__builtin_cpu_is ("bdver2"))
+    assert (val == 2);
+  else if (__builtin_cpu_supports ("avx2")
+	   && __builtin_cpu_supports ("ssse3"))
+    assert (val == 3);
+  else if (__builtin_cpu_supports ("avx2"))
+    assert (val == 4);
+  else if (__builtin_cpu_supports ("avx"))
+    assert (val == 5);
+  else if (__builtin_cpu_is ("corei7")
+	   && __builtin_cpu_supports ("popcnt"))
+    assert (val == 6);
+  else if (__builtin_cpu_supports ("popcnt"))
+    assert (val == 7);
+  else if (__builtin_cpu_is ("corei7"))
+    assert (val == 8);
+  else if (__builtin_cpu_supports ("sse4.2"))
+    assert (val == 9);
+  else if (__builtin_cpu_supports ("sse4.1"))
+    assert (val == 10);
+  else if (__builtin_cpu_is ("amdfam10h"))
+    assert (val == 11);
+  else if (__builtin_cpu_is ("core2"))
+    assert (val == 12);
+  else if (__builtin_cpu_is ("atom"))
+    assert (val == 13);
+  else if (__builtin_cpu_supports ("ssse3"))
+    assert (val == 14);
+  else if (__builtin_cpu_supports ("sse3"))
+    assert (val == 15);
+  else if (__builtin_cpu_supports ("sse2"))
+    assert (val == 16);
+  else if (__builtin_cpu_supports ("sse"))
+    assert (val == 17);
+  else if (__builtin_cpu_supports ("mmx"))
+    assert (val == 18);
+  else
+    assert (val == 0);
+  
+  return 0;
+}
+
+int foo ()
+{
+  return 0;
+}
+
+int __attribute__ ((target("arch=bdver1")))
+foo ()
+{
+  return 1;
+}
+
+int __attribute__ ((target("arch=bdver2")))
+foo ()
+{
+  return 2;
+}
+
+int __attribute__ ((target("avx2,ssse3")))
+foo ()
+{
+  return 3;
+}
+
+int __attribute__ ((target("avx2")))
+foo ()
+{
+  return 4;
+}
+
+int __attribute__ ((target("avx")))
+foo ()
+{
+  return 5;
+}
+
+int __attribute__ ((target("arch=corei7,popcnt")))
+foo ()
+{
+  return 6;
+}
+
+int __attribute__ ((target("popcnt")))
+foo ()
+{
+  return 7;
+}
+
+int __attribute__ ((target("arch=corei7")))
+foo ()
+{
+  return 8;
+}
+
+int __attribute__ ((target("sse4.2")))
+foo ()
+{
+  return 9;
+}
+
+int __attribute__ ((target("sse4.1")))
+foo ()
+{
+  return 10;
+}
+
+int __attribute__ ((target("arch=amdfam10")))
+foo ()
+{
+  return 11;
+}
+
+int __attribute__ ((target("arch=core2")))
+foo ()
+{
+  return 12;
+}
+
+int __attribute__ ((target("arch=atom")))
+foo ()
+{
+  return 13;
+}
+
+int __attribute__ ((target("ssse3")))
+foo ()
+{
+  return 14;
+}
+
+int __attribute__ ((target("sse3")))
+foo ()
+{
+  return 15;
+}
+
+int __attribute__ ((target("sse2")))
+foo ()
+{
+  return 16;
+}
+
+int __attribute__ ((target("sse")))
+foo ()
+{
+  return 17;
+}
+
+int __attribute__ ((target("mmx")))
+foo ()
+{
+  return 18;
+}
Index: gcc/cp/class.c
===================================================================
--- gcc/cp/class.c	(revision 187371)
+++ gcc/cp/class.c	(working copy)
@@ -39,6 +39,7 @@  along with GCC; see the file COPYING3.  If not see
 #include "tree-dump.h"
 #include "splay-tree.h"
 #include "pointer-set.h"
+#include "multiversion.h"
 
 /* The number of nested classes being processed.  If we are not in the
    scope of any class, this is zero.  */
@@ -1093,7 +1094,21 @@  add_method (tree type, tree method, tree using_dec
 	      || same_type_p (TREE_TYPE (fn_type),
 			      TREE_TYPE (method_type))))
 	{
-	  if (using_decl)
+	  /* For function versions, their parms and types match
+	     but they are not duplicates.  Record function versions
+	     as and when they are found.  */
+	  if (TREE_CODE (fn) == FUNCTION_DECL
+	      && TREE_CODE (method) == FUNCTION_DECL
+	      && (is_target_attribute_set (fn)
+		  || is_target_attribute_set (method))
+	      && has_different_version_attributes (fn, method))
+ 	    {
+	      mark_function_as_version (fn);
+	      mark_function_as_version (method);
+	      group_function_versions (fn, method);
+	      continue;
+	    }
+	  else if (using_decl)
 	    {
 	      if (DECL_CONTEXT (fn) == type)
 		/* Defer to the local function.  */
@@ -1151,6 +1166,7 @@  add_method (tree type, tree method, tree using_dec
   else
     /* Replace the current slot.  */
     VEC_replace (tree, method_vec, slot, overload);
+
   return true;
 }
 
@@ -6928,8 +6944,11 @@  resolve_address_of_overloaded_function (tree targe
 	  if (DECL_ANTICIPATED (fn))
 	    continue;
 
-	  /* See if there's a match.  */
-	  if (same_type_p (target_fn_type, static_fn_type (fn)))
+	  /* See if there's a match.   For functions that are multi-versioned
+	     match it to the default function.  */
+	  if (same_type_p (target_fn_type, static_fn_type (fn))
+	      && (!DECL_FUNCTION_VERSIONED (fn)
+		  || is_default_function (fn)))
 	    matches = tree_cons (fn, NULL_TREE, matches);
 	}
     }
@@ -7091,6 +7110,22 @@  resolve_address_of_overloaded_function (tree targe
       perform_or_defer_access_check (access_path, fn, fn);
     }
 
+  /* If a pointer to a function that is multi-versioned is requested, the
+     pointer to the dispatcher function is returned instead.  This works
+     well because indirectly calling the function will dispatch the right
+     function version at run-time. Also, the function address is kept
+     unique.  */
+  if (DECL_FUNCTION_VERSIONED (fn)
+      && is_default_function (fn))
+    {
+      tree ifunc_decl;
+      ifunc_decl = get_ifunc_for_version (fn);
+      retrofit_lang_decl (ifunc_decl);
+      gcc_assert (ifunc_decl != NULL);
+      mark_used (fn);
+      return build_fold_addr_expr (ifunc_decl);
+    }
+
   if (TYPE_PTRFN_P (target_type) || TYPE_PTRMEMFUNC_P (target_type))
     return cp_build_addr_expr (fn, flags);
   else
Index: gcc/cp/decl.c
===================================================================
--- gcc/cp/decl.c	(revision 187371)
+++ gcc/cp/decl.c	(working copy)
@@ -54,6 +54,7 @@  along with GCC; see the file COPYING3.  If not see
 #include "pointer-set.h"
 #include "splay-tree.h"
 #include "plugin.h"
+#include "multiversion.h"
 
 /* Possible cases of bad specifiers type used by bad_specifiers. */
 enum bad_spec_place {
@@ -973,6 +974,21 @@  decls_match (tree newdecl, tree olddecl)
       if (t1 != t2)
 	return 0;
 
+      /* The decls dont match if they correspond to two different versions
+	 of the same function.  */
+      if (compparms (p1, p2)
+	  && same_type_p (TREE_TYPE (f1), TREE_TYPE (f2)) 
+	  && has_different_version_attributes (newdecl, olddecl))
+	{
+	  /* One of the decls could be the default without the "target"
+	     attribute. Set it to be a versioned function here.  */
+	  mark_function_as_version (newdecl);
+	  mark_function_as_version (olddecl);
+	  /* Accumulate all the versions of a function.  */
+	  group_function_versions (olddecl, newdecl);
+	  return 0;
+	}
+
       if (CP_DECL_CONTEXT (newdecl) != CP_DECL_CONTEXT (olddecl)
 	  && ! (DECL_EXTERN_C_P (newdecl)
 		&& DECL_EXTERN_C_P (olddecl)))
@@ -1490,7 +1506,11 @@  duplicate_decls (tree newdecl, tree olddecl, bool
 	      error ("previous declaration %q+#D here", olddecl);
 	      return NULL_TREE;
 	    }
-	  else if (compparms (TYPE_ARG_TYPES (TREE_TYPE (newdecl)),
+	  /* For function versions, params and types match, but they
+	     are not ambiguous.  */
+	  else if ((!DECL_FUNCTION_VERSIONED (newdecl)
+		    && !DECL_FUNCTION_VERSIONED (olddecl))
+		   && compparms (TYPE_ARG_TYPES (TREE_TYPE (newdecl)),
 			      TYPE_ARG_TYPES (TREE_TYPE (olddecl))))
 	    {
 	      error ("new declaration %q#D", newdecl);
@@ -2262,6 +2282,16 @@  duplicate_decls (tree newdecl, tree olddecl, bool
   else if (DECL_PRESERVE_P (newdecl))
     DECL_PRESERVE_P (olddecl) = 1;
 
+  /* If the olddecl is a version, so is the newdecl.  */
+  if (TREE_CODE (newdecl) == FUNCTION_DECL
+      && DECL_FUNCTION_VERSIONED (olddecl))
+    {
+      DECL_FUNCTION_VERSIONED (newdecl) = 1;
+      /* Record that newdecl is not a valid version and has
+	 been deleted.  */
+      mark_delete_decl_version (newdecl);
+    }
+
   if (TREE_CODE (newdecl) == FUNCTION_DECL)
     {
       int function_size;
@@ -3810,6 +3840,7 @@  cp_make_fname_decl (location_t loc, tree id, int t
 			    ? NULL : fname_as_string (type_dep));
   tree type;
   tree init = cp_fname_init (name, &type);
+
   tree decl = build_decl (loc, VAR_DECL, id, type);
 
   if (name)
@@ -14036,7 +14067,11 @@  cxx_comdat_group (tree decl)
 	  else
 	    break;
 	}
-      name = DECL_ASSEMBLER_NAME (decl);
+      if (TREE_CODE (decl) == FUNCTION_DECL
+	  && DECL_FUNCTION_VERSIONED (decl))
+	name = DECL_NAME (decl);
+      else
+        name = DECL_ASSEMBLER_NAME (decl);
     }
 
   return name;
Index: gcc/cp/semantics.c
===================================================================
--- gcc/cp/semantics.c	(revision 187371)
+++ gcc/cp/semantics.c	(working copy)
@@ -3783,8 +3783,11 @@  expand_or_defer_fn_1 (tree fn)
       /* If the user wants us to keep all inline functions, then mark
 	 this function as needed so that finish_file will make sure to
 	 output it later.  Similarly, all dllexport'd functions must
-	 be emitted; there may be callers in other DLLs.  */
-      if ((flag_keep_inline_functions
+	 be emitted; there may be callers in other DLLs.
+	 Also, mark this function as needed if it is marked inline but
+	 is a multi-versioned function.  */
+      if (((flag_keep_inline_functions
+	    || DECL_FUNCTION_VERSIONED (fn))
 	   && DECL_DECLARED_INLINE_P (fn)
 	   && !DECL_REALLY_EXTERN (fn))
 	  || (flag_keep_inline_dllexport
Index: gcc/cp/decl2.c
===================================================================
--- gcc/cp/decl2.c	(revision 187371)
+++ gcc/cp/decl2.c	(working copy)
@@ -53,6 +53,7 @@  along with GCC; see the file COPYING3.  If not see
 #include "splay-tree.h"
 #include "langhooks.h"
 #include "c-family/c-ada-spec.h"
+#include "multiversion.h"
 
 extern cpp_reader *parse_in;
 
@@ -677,9 +678,13 @@  check_classfn (tree ctype, tree function, tree tem
 	  if (is_template != (TREE_CODE (fndecl) == TEMPLATE_DECL))
 	    continue;
 
+	  /* While finding a match, same types and params are not enough
+	     if the function is versioned.  Also check version ("target")
+	     attributes.  */
 	  if (same_type_p (TREE_TYPE (TREE_TYPE (function)),
 			   TREE_TYPE (TREE_TYPE (fndecl)))
 	      && compparms (p1, p2)
+	      && !has_different_version_attributes (function, fndecl)
 	      && (!is_template
 		  || comp_template_parms (template_parms,
 					  DECL_TEMPLATE_PARMS (fndecl)))
Index: gcc/cp/call.c
===================================================================
--- gcc/cp/call.c	(revision 187371)
+++ gcc/cp/call.c	(working copy)
@@ -41,6 +41,7 @@  along with GCC; see the file COPYING3.  If not see
 #include "langhooks.h"
 #include "c-family/c-objc.h"
 #include "timevar.h"
+#include "multiversion.h"
 
 /* The various kinds of conversion.  */
 
@@ -3903,6 +3904,16 @@  build_new_function_call (tree fn, VEC(tree,gc) **a
     {
       if (complain & tf_error)
 	{
+	  /* If the call is to a multiversioned function without
+	     a default version, overload resolution will fail.  */
+	  if (candidates
+	      && TREE_CODE (candidates->fn) == FUNCTION_DECL
+	      && DECL_FUNCTION_VERSIONED (candidates->fn))
+	    error_at (location_of (DECL_NAME (OVL_CURRENT (fn))),
+		      "Call to multiversioned function %<%D(%A)%> with"
+		      " no default version", DECL_NAME (OVL_CURRENT (fn)),
+		      build_tree_list_vec (*args));
+
 	  if (!any_viable_p && candidates && ! candidates->next
 	      && (TREE_CODE (candidates->fn) == FUNCTION_DECL))
 	    return cp_build_function_call_vec (candidates->fn, args, complain);
@@ -6824,6 +6835,19 @@  build_over_call (struct z_candidate *cand, int fla
   if (!already_used)
     mark_used (fn);
 
+  /* For a call to a multi-versioned function, the call should actually be to
+     the dispatcher.  */
+  if (DECL_FUNCTION_VERSIONED (fn)
+      && is_default_function (fn))
+    {
+      tree ifunc_decl;
+      ifunc_decl = get_ifunc_for_version (fn);
+      retrofit_lang_decl (ifunc_decl);
+      gcc_assert (ifunc_decl != NULL);
+      return build_call_expr_loc_array (UNKNOWN_LOCATION, ifunc_decl,
+					nargs, argarray);
+    }
+
   if (DECL_VINDEX (fn) && (flags & LOOKUP_NONVIRTUAL) == 0)
     {
       tree t;
@@ -8081,6 +8105,60 @@  joust (struct z_candidate *cand1, struct z_candida
   size_t i;
   size_t len;
 
+  /* For Candidates of a multi-versioned function, first check if the
+     target flags of the caller match any of the candidates. If so,
+     the caller can directly call this candidate otherwise the one marked
+     default wins.  This is because the default decl is used as key to
+     aggregate all the other versions provided for it in multiversion.c.
+     When generating the actual call, the appropriate dispatcher is created
+     to call the right function version at run-time.  */
+
+  if ((TREE_CODE (cand1->fn) == FUNCTION_DECL
+       && DECL_FUNCTION_VERSIONED (cand1->fn))
+      ||(TREE_CODE (cand2->fn) == FUNCTION_DECL
+	 && DECL_FUNCTION_VERSIONED (cand2->fn)))
+    {
+      /* Both functions must be marked versioned.  */
+      gcc_assert (DECL_FUNCTION_VERSIONED (cand1->fn)
+		  && DECL_FUNCTION_VERSIONED (cand2->fn));
+
+      /* Try to see if a direct call can be made to a version.  This is
+	 possible if the caller and callee have the same target flags.
+	 If cand->fn is marked with target attributes,  check if the
+	 target approves inlining this into the caller.  If so, this is
+	 the version we want.  */
+
+      if (is_target_attribute_set (cand1->fn)
+	  && targetm.target_option.can_inline_p (current_function_decl,
+						 cand1->fn))
+	return 1;
+
+      if (is_target_attribute_set (cand2->fn)
+	  && targetm.target_option.can_inline_p (current_function_decl,
+						 cand2->fn))
+	return -1;
+
+      /* A direct call to a version is not possible, so find the default
+	 function and return it.  This will later be converted to dispatch
+	 the right version at run time.  */
+
+      if (is_default_function (cand1->fn))
+	{
+          mark_used (cand2->fn);
+	  return 1;
+	}
+
+      if (is_default_function (cand2->fn))
+	{
+          mark_used (cand1->fn);
+	  return -1;
+	}
+
+      /* If a default function is absent, this will never get resolved leading
+	 to an ambiguous call error.  */
+      return 0;
+    }
+
   /* Candidates that involve bad conversions are always worse than those
      that don't.  */
   if (cand1->viable > cand2->viable)
Index: gcc/timevar.def
===================================================================
--- gcc/timevar.def	(revision 187371)
+++ gcc/timevar.def	(working copy)
@@ -253,6 +253,7 @@  DEFTIMEVAR (TV_TREE_IFCOMBINE        , "tree if-co
 DEFTIMEVAR (TV_TREE_UNINIT           , "uninit var analysis")
 DEFTIMEVAR (TV_PLUGIN_INIT           , "plugin initialization")
 DEFTIMEVAR (TV_PLUGIN_RUN            , "plugin execution")
+DEFTIMEVAR (TV_MULTIVERSION_DISPATCH , "multiversion dispatch")
 
 /* Everything else in rest_of_compilation not included above.  */
 DEFTIMEVAR (TV_EARLY_LOCAL	     , "early local passes")
Index: gcc/Makefile.in
===================================================================
--- gcc/Makefile.in	(revision 187371)
+++ gcc/Makefile.in	(working copy)
@@ -1297,6 +1297,7 @@  OBJS = \
 	mcf.o \
 	mode-switching.o \
 	modulo-sched.o \
+	multiversion.o \
 	omega.o \
 	omp-low.o \
 	optabs.o \
@@ -3042,6 +3043,11 @@  ree.o : ree.c $(CONFIG_H) $(SYSTEM_H) coretypes.h
    $(DF_H) $(TIMEVAR_H) tree-pass.h $(RECOG_H) $(EXPR_H) \
    $(REGS_H) $(TREE_H) $(TM_P_H) insn-config.h $(INSN_ATTR_H) $(DIAGNOSTIC_CORE_H) \
    $(TARGET_H) $(OPTABS_H) insn-codes.h rtlhooks-def.h $(PARAMS_H) $(CGRAPH_H)
+multiversion.o : multiversion.c $(CONFIG_H) $(SYSTEM_H) coretypes.h $(TM_H) \
+   $(TREE_H) langhooks.h $(TREE_INLINE_H) $(FLAGS_H) $(CGRAPH_H) intl.h \
+   $(DIAGNOSTIC_H) $(FIBHEAP_H) $(PARAMS_H) $(TIMEVAR_H) tree-pass.h \
+   $(HASHTAB_H) $(COVERAGE_H) $(GGC_H) $(TREE_FLOW_H) $(RTL_H) $(IPA_PROP_H) \
+   $(BASIC_BLOCK_H) $(TOPLEV_H) $(TREE_DUMP_H) ipa-inline.h
 cprop.o : cprop.c $(CONFIG_H) $(SYSTEM_H) coretypes.h $(TM_H) $(RTL_H) \
    $(REGS_H) hard-reg-set.h $(FLAGS_H) insn-config.h $(GGC_H) \
    $(RECOG_H) $(EXPR_H) $(BASIC_BLOCK_H) $(FUNCTION_H) output.h toplev.h $(DIAGNOSTIC_CORE_H) \
Index: gcc/passes.c
===================================================================
--- gcc/passes.c	(revision 187371)
+++ gcc/passes.c	(working copy)
@@ -1293,6 +1293,7 @@  init_optimization_passes (void)
   NEXT_PASS (pass_build_cfg);
   NEXT_PASS (pass_warn_function_return);
   NEXT_PASS (pass_build_cgraph_edges);
+  NEXT_PASS (pass_dispatch_versions);
   *p = NULL;
 
   /* Interprocedural optimization passes.  */
Index: gcc/cp/mangle.c
===================================================================
--- gcc/cp/mangle.c	(revision 187371)
+++ gcc/cp/mangle.c	(working copy)
@@ -1245,7 +1245,12 @@  write_unqualified_name (const tree decl)
     {
       MANGLE_TRACE_TREE ("local-source-name", decl);
       write_char ('L');
-      write_source_name (DECL_NAME (decl));
+      if (TREE_CODE (decl) == FUNCTION_DECL
+	  && DECL_FUNCTION_VERSIONED (decl)
+	  && DECL_ASSEMBLER_NAME_SET_P (decl))
+	write_source_name (DECL_ASSEMBLER_NAME (decl));
+      else
+	write_source_name (DECL_NAME (decl));
       /* The default discriminator is 1, and that's all we ever use,
 	 so there's no code to output one here.  */
     }
@@ -1260,7 +1265,14 @@  write_unqualified_name (const tree decl)
                && LAMBDA_TYPE_P (type))
         write_closure_type_name (type);
       else
-        write_source_name (DECL_NAME (decl));
+	{
+	  if (TREE_CODE (decl) == FUNCTION_DECL
+	      && DECL_FUNCTION_VERSIONED (decl)
+	      && DECL_ASSEMBLER_NAME_SET_P (decl))
+	    write_source_name (DECL_ASSEMBLER_NAME (decl));
+	  else
+	    write_source_name (DECL_NAME (decl));
+	}
     }
 }
 
Index: gcc/config/i386/i386.c
===================================================================
--- gcc/config/i386/i386.c	(revision 187371)
+++ gcc/config/i386/i386.c	(working copy)
@@ -27664,6 +27664,438 @@  ix86_init_mmx_sse_builtins (void)
     }
 }
 
+
+/* This adds a condition to the basic_block NEW_BB in function FUNCTION_DECL
+   to return a pointer to VERSION_DECL if the outcome of the expression
+   formed by PREDICATE_CHAIN is true.  This function will be called during
+   version dispatch to decide which function version to execute.  It returns
+   the basic block at the end to which more conditions can be added.  */
+
+static basic_block
+add_condition_to_bb (tree function_decl, tree version_decl,
+		     tree predicate_chain, basic_block new_bb)
+{
+  gimple return_stmt;
+  tree convert_expr, result_var;
+  gimple convert_stmt;
+  gimple call_cond_stmt;
+  gimple if_else_stmt;
+
+  basic_block bb1, bb2, bb3;
+  edge e12, e23;
+
+  tree cond_var, and_expr_var = NULL_TREE;
+  gimple_seq gseq;
+
+  tree old_current_function_decl;
+  tree predicate_decl, predicate_arg;
+
+  old_current_function_decl = current_function_decl;
+  push_cfun (DECL_STRUCT_FUNCTION (function_decl));
+  current_function_decl = function_decl;
+
+  gcc_assert (new_bb != NULL);
+  gseq = bb_seq (new_bb);
+
+
+  convert_expr = build1 (CONVERT_EXPR, ptr_type_node,
+	     		 build_fold_addr_expr (version_decl));
+  result_var = create_tmp_var (ptr_type_node, NULL);
+  convert_stmt = gimple_build_assign (result_var, convert_expr); 
+  return_stmt = gimple_build_return (result_var);
+
+  if (predicate_chain == NULL_TREE)
+    {
+      gimple_seq_add_stmt (&gseq, convert_stmt);
+      gimple_seq_add_stmt (&gseq, return_stmt);
+      set_bb_seq (new_bb, gseq);
+      gimple_set_bb (convert_stmt, new_bb);
+      gimple_set_bb (return_stmt, new_bb);
+      pop_cfun ();
+      current_function_decl = old_current_function_decl;
+      return new_bb;
+    }
+
+  while (predicate_chain != NULL)
+    {
+      cond_var = create_tmp_var (integer_type_node, NULL);
+      predicate_decl = TREE_PURPOSE (predicate_chain);
+      predicate_arg = TREE_VALUE (predicate_chain);
+      call_cond_stmt = gimple_build_call (predicate_decl, 1, predicate_arg);
+      gimple_call_set_lhs (call_cond_stmt, cond_var);
+
+      gimple_set_block (call_cond_stmt, DECL_INITIAL (function_decl));
+      gimple_set_bb (call_cond_stmt, new_bb);
+      gimple_seq_add_stmt (&gseq, call_cond_stmt);
+
+      predicate_chain = TREE_CHAIN (predicate_chain);
+      
+      if (and_expr_var == NULL)
+        and_expr_var = cond_var;
+      else
+	{
+	  gimple assign_stmt;
+	  /* Use MIN_EXPR to check if any integer is zero?.
+	     and_expr_var = min_expr <cond_var, and_expr_var>  */
+	  assign_stmt = gimple_build_assign (and_expr_var,
+			  build2 (MIN_EXPR, integer_type_node,
+				  cond_var, and_expr_var));
+
+	  gimple_set_block (assign_stmt, DECL_INITIAL (function_decl));
+	  gimple_set_bb (assign_stmt, new_bb);
+	  gimple_seq_add_stmt (&gseq, assign_stmt);
+	}
+    }
+
+  if_else_stmt = gimple_build_cond (GT_EXPR, and_expr_var,
+	  		            integer_zero_node,
+				    NULL_TREE, NULL_TREE);
+  gimple_set_block (if_else_stmt, DECL_INITIAL (function_decl));
+  gimple_set_bb (if_else_stmt, new_bb);
+  gimple_seq_add_stmt (&gseq, if_else_stmt);
+
+  gimple_seq_add_stmt (&gseq, convert_stmt);
+  gimple_seq_add_stmt (&gseq, return_stmt);
+  set_bb_seq (new_bb, gseq);
+
+  bb1 = new_bb;
+  e12 = split_block (bb1, if_else_stmt);
+  bb2 = e12->dest;
+  e12->flags &= ~EDGE_FALLTHRU;
+  e12->flags |= EDGE_TRUE_VALUE;
+
+  e23 = split_block (bb2, return_stmt);
+
+  gimple_set_bb (convert_stmt, bb2);
+  gimple_set_bb (return_stmt, bb2);
+
+  bb3 = e23->dest;
+  make_edge (bb1, bb3, EDGE_FALSE_VALUE); 
+
+  remove_edge (e23);
+  make_edge (bb2, EXIT_BLOCK_PTR, 0);
+
+  rebuild_cgraph_edges ();
+
+  pop_cfun ();
+  current_function_decl = old_current_function_decl;
+
+  return bb3;
+}
+
+/* This parses the attribute arguments to target in DECL and determines
+   the right builtin to use to match the platform specification.
+   For now, only one target argument ("arch=" or "<-m>xxx") is allowed.  */
+
+static tree 
+get_builtin_code_for_version (tree decl, unsigned int *priority)
+{
+  tree attrs;
+  struct cl_target_option cur_target;
+  tree target_node;
+  struct cl_target_option *new_target;
+  const char *arg_str = NULL;
+  const char *attrs_str = NULL;
+  char *tok_str = NULL;
+  char *token;
+
+  /* Priority of i386 features, greater value is higher priority.   This is
+     used to decide the order in which function dispatch must happen.  For
+     instance, a version specialized for SSE4.2 should be checked for dispatch
+     before a version for SSE3, as SSE4.2 implies SSE3.  */
+  enum feature_priority
+  {
+    P_ZERO = 0,
+    P_MMX,
+    P_SSE,
+    P_SSE2,
+    P_SSE3,
+    P_SSSE3,
+    P_PROC_SSSE3,
+    P_SSE4_a,
+    P_PROC_SSE4_a,
+    P_SSE4_1,
+    P_SSE4_2,
+    P_PROC_SSE4_2,
+    P_POPCNT,
+    P_AVX,
+    P_AVX2,
+    P_FMA,
+    P_PROC_FMA
+  };
+
+  /* These are the target attribute strings for which a dispatcher is
+     available, from fold_builtin_cpu.  */
+
+  static struct _feature_list
+    {
+      const char *const name;
+      const enum feature_priority priority;
+    }
+  const feature_list[] =
+    {
+      {"mmx", P_MMX},
+      {"sse", P_SSE},
+      {"sse2", P_SSE2},
+      {"sse3", P_SSE3},
+      {"ssse3", P_SSSE3},
+      {"sse4.1", P_SSE4_1},
+      {"sse4.2", P_SSE4_2},
+      {"popcnt", P_POPCNT},
+      {"avx", P_AVX},
+      {"avx2", P_AVX2}
+    };
+
+
+  static unsigned int NUM_FEATURES
+    = sizeof (feature_list) / sizeof (struct _feature_list);
+
+  unsigned int i;
+
+  tree predicate_chain = NULL_TREE;
+  tree predicate_decl, predicate_arg;
+
+  attrs = lookup_attribute ("target", DECL_ATTRIBUTES (decl));
+  gcc_assert (attrs != NULL);
+
+  attrs = TREE_VALUE (TREE_VALUE (attrs));
+
+  gcc_assert (TREE_CODE (attrs) == STRING_CST);
+  attrs_str = TREE_STRING_POINTER (attrs);
+
+  *priority = 0;
+
+  /* Handle arch= if specified.  For priority, set it to be 1 more than
+     the best instruction set the processor can handle.  For instance, if
+     there is a version for atom and a version for ssse3 (the highest ISA
+     priority for atom), the atom version must be checked for dispatch
+     before the ssse3 version. */
+  if (strstr (attrs_str, "arch=") != NULL)
+    {
+      cl_target_option_save (&cur_target, &global_options);
+      target_node = ix86_valid_target_attribute_tree (attrs);
+    
+      gcc_assert (target_node);
+      new_target = TREE_TARGET_OPTION (target_node);
+      gcc_assert (new_target);
+      
+      if (new_target->arch_specified && new_target->arch > 0)
+	{
+	  switch (new_target->arch)
+	    {
+	    case PROCESSOR_CORE2_32:
+	    case PROCESSOR_CORE2_64:
+	      arg_str = "core2";
+	      *priority = P_PROC_SSSE3;
+	      break;
+	    case PROCESSOR_COREI7_32:
+	    case PROCESSOR_COREI7_64:
+	      arg_str = "corei7";
+	      *priority = P_PROC_SSE4_2;
+	      break;
+	    case PROCESSOR_ATOM:
+	      arg_str = "atom";
+	      *priority = P_PROC_SSSE3;
+	      break;
+	    case PROCESSOR_AMDFAM10:
+	      arg_str = "amdfam10h";
+	      *priority = P_PROC_SSE4_a;
+	      break;
+	    case PROCESSOR_BDVER1:
+	      arg_str = "bdver1";
+	      *priority = P_PROC_FMA;
+	      break;
+	    case PROCESSOR_BDVER2:
+	      arg_str = "bdver2";
+	      *priority = P_PROC_FMA;
+	      break;
+	    }  
+	}    
+    
+      cl_target_option_restore (&global_options, &cur_target);
+      if (arg_str == NULL)
+	{
+	  error_at (DECL_SOURCE_LOCATION (decl),
+	    	"No dispatcher found for the versioning attributes");
+	  return NULL;
+	}
+    
+      predicate_decl = ix86_builtins [(int) IX86_BUILTIN_CPU_IS];
+      /* For a C string literal the length includes the trailing NULL.  */
+      predicate_arg = build_string_literal (strlen (arg_str) + 1, arg_str);
+      predicate_chain = tree_cons (predicate_decl, predicate_arg,
+				   predicate_chain);
+    }
+
+  /* Process feature name.  */
+  tok_str =  (char *) xmalloc (strlen (attrs_str) + 1);
+  strcpy (tok_str, attrs_str);
+  token = strtok (tok_str, ",");
+  predicate_decl = ix86_builtins [(int) IX86_BUILTIN_CPU_SUPPORTS];
+
+  while (token != NULL)
+    {
+      /* Do not process "arch="  */
+      if (strncmp (token, "arch=", 5) == 0)
+	{
+	  token = strtok (NULL, ",");
+	  continue;
+	}
+      for (i = 0; i < NUM_FEATURES; ++i)
+	{
+	  if (strcmp (token, feature_list[i].name) == 0)
+	    {
+	      predicate_arg = build_string_literal (
+				strlen (feature_list[i].name) + 1,
+				feature_list[i].name);
+	      predicate_chain = tree_cons (predicate_decl, predicate_arg,
+					   predicate_chain);
+	      /* Find the maximum priority feature.  */
+	      if (feature_list[i].priority > *priority)
+		*priority = feature_list[i].priority;
+
+	      break;
+	    }
+	}
+      if (i == NUM_FEATURES)
+	{
+	  error_at (DECL_SOURCE_LOCATION (decl),
+		    "No dispatcher found for %s", token);
+	  return NULL;
+	}
+      token = strtok (NULL, ",");
+    }
+  free (tok_str);
+
+  if (predicate_chain == NULL_TREE)
+    {
+      error_at (DECL_SOURCE_LOCATION (decl),
+	        "No dispatcher found for the versioning attributes : %s",
+	        attrs_str);
+      return NULL;
+    }
+
+  predicate_chain = nreverse (predicate_chain);
+  return predicate_chain; 
+}
+ 
+static int
+feature_compare (const void *v1, const void *v2)
+{
+  typedef struct _function_version_info
+    {
+      tree version_decl;
+      tree predicate_chain;
+      unsigned int dispatch_priority;
+    } function_version_info;
+
+  const function_version_info c1 = *(const function_version_info *)v1;
+  const function_version_info c2 = *(const function_version_info *)v2;
+  return (c2.dispatch_priority - c1.dispatch_priority);
+}
+
+/* This is the target hook to generate the dispatch function for
+   multi-versioned functions.  DISPATCH_DECL is the function which will
+   contain the dispatch logic.  FNDECLS are the function choices for
+   dispatch, and is a tree chain.  EMPTY_BB is the basic block pointer
+   in DISPATCH_DECL in which the dispatch code is generated.  */
+
+static int
+ix86_dispatch_version (tree dispatch_decl,
+		       void *fndecls_p,
+		       basic_block *empty_bb)
+{
+  tree default_decl;
+  gimple ifunc_cpu_init_stmt;
+  gimple_seq gseq;
+  tree old_current_function_decl;
+  int ix;
+  tree ele;
+  VEC (tree, heap) *fndecls;
+  unsigned int num_versions = 0;
+  unsigned int actual_versions = 0;
+  unsigned int i;
+
+  struct _function_version_info
+    {
+      tree version_decl;
+      tree predicate_chain;
+      unsigned int dispatch_priority;
+    }*function_version_info;
+
+  gcc_assert (dispatch_decl != NULL
+	      && fndecls_p != NULL
+	      && empty_bb != NULL);
+
+  /*fndecls_p is actually a vector.  */
+  fndecls = (VEC (tree, heap) *)fndecls_p;
+
+  /* Atleast one more version other than the default.  */
+  num_versions = VEC_length (tree, fndecls);
+  gcc_assert (num_versions >= 2);
+
+  function_version_info = (struct _function_version_info *)
+    xmalloc ((num_versions - 1) * sizeof (struct _function_version_info));
+
+  /* The first version in the vector is the default decl.  */
+  default_decl = VEC_index (tree, fndecls, 0);
+
+  old_current_function_decl = current_function_decl;
+  push_cfun (DECL_STRUCT_FUNCTION (dispatch_decl));
+  current_function_decl = dispatch_decl;
+
+  gseq = bb_seq (*empty_bb);
+  ifunc_cpu_init_stmt = gimple_build_call_vec (
+                     ix86_builtins [(int) IX86_BUILTIN_CPU_INIT], NULL);
+  gimple_seq_add_stmt (&gseq, ifunc_cpu_init_stmt);
+  gimple_set_bb (ifunc_cpu_init_stmt, *empty_bb);
+  set_bb_seq (*empty_bb, gseq);
+
+  pop_cfun ();
+  current_function_decl = old_current_function_decl;
+
+
+  for (ix = 1; VEC_iterate (tree, fndecls, ix, ele); ++ix)
+    {
+      tree version_decl = ele;
+      tree predicate_chain = NULL_TREE;
+      unsigned int priority;
+      /* Get attribute string, parse it and find the right predicate decl.
+         The predicate function could be a lengthy combination of many
+	 features, like arch-type and various isa-variants.  */
+      predicate_chain = get_builtin_code_for_version (version_decl, &priority);
+
+      if (predicate_chain == NULL_TREE)
+	continue;
+
+      actual_versions++;
+      function_version_info [ix - 1].version_decl = version_decl;
+      function_version_info [ix - 1].predicate_chain = predicate_chain;
+      function_version_info [ix - 1].dispatch_priority = priority;
+    }
+
+  /* Sort the versions according to descending order of dispatch priority.  The
+     priority is based on the ISA.  This is not a perfect solution.  There
+     could still be ambiguity.  If more than one function version is suitable
+     to execute,  which one should be dispatched?  In future, allow the user
+     to specify a dispatch  priority next to the version.  */
+  qsort (function_version_info, actual_versions,
+         sizeof (struct _function_version_info), feature_compare);
+
+  for  (i = 0; i < actual_versions; ++i)
+    *empty_bb = add_condition_to_bb (dispatch_decl,
+				     function_version_info[i].version_decl,
+				     function_version_info[i].predicate_chain,
+				     *empty_bb);
+
+  /* dispatch default version at the end.  */
+  *empty_bb = add_condition_to_bb (dispatch_decl, default_decl,
+				   NULL, *empty_bb);
+
+  free (function_version_info);
+  return 0;
+}
+
 /* This builds the processor_model struct type defined in
    libgcc/config/i386/cpuinfo.c  */
 
@@ -39539,6 +39971,9 @@  ix86_memmodel_check (unsigned HOST_WIDE_INT val)
 #undef TARGET_FOLD_BUILTIN
 #define TARGET_FOLD_BUILTIN ix86_fold_builtin
 
+#undef TARGET_DISPATCH_VERSION
+#define TARGET_DISPATCH_VERSION ix86_dispatch_version
+
 #undef TARGET_ENUM_VA_LIST_P
 #define TARGET_ENUM_VA_LIST_P ix86_enum_va_list
 
Index: gcc/cp/error.c
===================================================================
--- gcc/cp/error.c	(revision 187371)
+++ gcc/cp/error.c	(working copy)
@@ -1534,8 +1534,15 @@  dump_exception_spec (tree t, int flags)
 static void
 dump_function_name (tree t, int flags)
 {
-  tree name = DECL_NAME (t);
+  tree name;
 
+  /* For function versions, use the assembler name as the decl name is
+     the same for all versions.  */
+  if (DECL_FUNCTION_VERSIONED (t))
+    name = DECL_ASSEMBLER_NAME (t);
+  else
+    name = DECL_NAME (t);
+
   /* We can get here with a decl that was synthesized by language-
      independent machinery (e.g. coverage.c) in which case it won't
      have a lang_specific structure attached and DECL_CONSTRUCTOR_P