Patchwork 19/n: trans-mem: middle end/misc patches (LAST PATCH)

login
register
mail settings
Submitter Aldy Hernandez
Date Nov. 3, 2011, 7:32 p.m.
Message ID <4EB2EC3F.6000908@redhat.com>
Download mbox | patch
Permalink /patch/123500/
State New
Headers show

Comments

Aldy Hernandez - Nov. 3, 2011, 7:32 p.m.
This is everything else that doesn't fit neatly into any other category. 
  Here are the middle end changes, as well as pass ordering code, along 
with varasm and a potpourri of other small changes.

This is the last patch.  Please let me know if there is anything else 
(reasonable) you would like me to post.
Richard Guenther - Nov. 4, 2011, 11:14 a.m.
On Thu, Nov 3, 2011 at 8:32 PM, Aldy Hernandez <aldyh@redhat.com> wrote:
> This is everything else that doesn't fit neatly into any other category.
>  Here are the middle end changes, as well as pass ordering code, along with
> varasm and a potpourri of other small changes.
>
> This is the last patch.  Please let me know if there is anything else
> (reasonable) you would like me to post.
>
> Index: gcc/cgraph.h
> ===================================================================
> --- gcc/cgraph.h        (.../trunk)     (revision 180744)
> +++ gcc/cgraph.h        (.../branches/transactional-memory)     (revision
> 180773)
> @@ -98,6 +98,9 @@ struct GTY(()) cgraph_local_info {
>   /* True when the function has been originally extern inline, but it is
>      redefined now.  */
>   unsigned redefined_extern_inline : 1;
> +
> +  /* True if the function may enter serial irrevocable mode.  */
> +  unsigned tm_may_enter_irr : 1;
>  };
>
>  /* Information about the function that needs to be computed globally
> @@ -565,6 +568,8 @@ void verify_cgraph_node (struct cgraph_n
>  void cgraph_build_static_cdtor (char which, tree body, int priority);
>  void cgraph_reset_static_var_maps (void);
>  void init_cgraph (void);
> +struct cgraph_node * cgraph_copy_node_for_versioning (struct cgraph_node *,
> +               tree, VEC(cgraph_edge_p,heap)*, bitmap);
>  struct cgraph_node *cgraph_function_versioning (struct cgraph_node *,
>                                                VEC(cgraph_edge_p,heap)*,
>                                                VEC(ipa_replace_map_p,gc)*,
> Index: gcc/tree-pass.h
> ===================================================================
> --- gcc/tree-pass.h     (.../trunk)     (revision 180744)
> +++ gcc/tree-pass.h     (.../branches/transactional-memory)     (revision
> 180773)
> @@ -447,6 +447,12 @@ extern struct gimple_opt_pass pass_build
>  extern struct gimple_opt_pass pass_local_pure_const;
>  extern struct gimple_opt_pass pass_tracer;
>  extern struct gimple_opt_pass pass_warn_unused_result;
> +extern struct gimple_opt_pass pass_diagnose_tm_blocks;
> +extern struct gimple_opt_pass pass_lower_tm;
> +extern struct gimple_opt_pass pass_tm_init;
> +extern struct gimple_opt_pass pass_tm_mark;
> +extern struct gimple_opt_pass pass_tm_memopt;
> +extern struct gimple_opt_pass pass_tm_edges;
>  extern struct gimple_opt_pass pass_split_functions;
>  extern struct gimple_opt_pass pass_feedback_split_functions;
>
> @@ -469,6 +475,7 @@ extern struct ipa_opt_pass_d pass_ipa_pu
>  extern struct simple_ipa_opt_pass pass_ipa_pta;
>  extern struct ipa_opt_pass_d pass_ipa_lto_wpa_fixup;
>  extern struct ipa_opt_pass_d pass_ipa_lto_finish_out;
> +extern struct simple_ipa_opt_pass pass_ipa_tm;
>  extern struct ipa_opt_pass_d pass_ipa_profile;
>  extern struct ipa_opt_pass_d pass_ipa_cdtor_merge;
>
> Index: gcc/rtlanal.c
> ===================================================================
> --- gcc/rtlanal.c       (.../trunk)     (revision 180744)
> +++ gcc/rtlanal.c       (.../branches/transactional-memory)     (revision
> 180773)
> @@ -1918,6 +1918,7 @@ alloc_reg_note (enum reg_note kind, rtx
>     case REG_CC_USER:
>     case REG_LABEL_TARGET:
>     case REG_LABEL_OPERAND:
> +    case REG_TM:
>       /* These types of register notes use an INSN_LIST rather than an
>         EXPR_LIST, so that copying is done right and dumps look
>         better.  */
> Index: gcc/omp-low.c
> ===================================================================
> --- gcc/omp-low.c       (.../trunk)     (revision 180744)
> +++ gcc/omp-low.c       (.../branches/transactional-memory)     (revision
> 180773)
> @@ -139,6 +139,7 @@ static tree scan_omp_1_op (tree *, int *
>     case GIMPLE_TRY: \
>     case GIMPLE_CATCH: \
>     case GIMPLE_EH_FILTER: \
> +    case GIMPLE_TRANSACTION: \
>       /* The sub-statements for these should be walked.  */ \
>       *handled_ops_p = false; \
>       break;
> Index: gcc/toplev.c
> ===================================================================
> --- gcc/toplev.c        (.../trunk)     (revision 180744)
> +++ gcc/toplev.c        (.../branches/transactional-memory)     (revision
> 180773)
> @@ -599,6 +599,7 @@ compile_file (void)
>
>       output_shared_constant_pool ();
>       output_object_blocks ();
> +  finish_tm_clone_pairs ();
>       /* Write out any pending weak symbol declarations.  */
>       weak_finish ();
> Index: gcc/cgraphunit.c
> ===================================================================
> --- gcc/cgraphunit.c    (.../trunk)     (revision 180744)
> +++ gcc/cgraphunit.c    (.../branches/transactional-memory)     (revision
> 180773)
> @@ -2272,7 +2272,7 @@ update_call_expr (struct cgraph_node *ne
>    was copied to prevent duplications of calls that are dead
>    in the clone.  */
>
> -static struct cgraph_node *
> +struct cgraph_node *
>  cgraph_copy_node_for_versioning (struct cgraph_node *old_version,
>                                 tree new_decl,
>                                 VEC(cgraph_edge_p,heap) *redirect_callers,
> @@ -2286,7 +2286,7 @@ cgraph_copy_node_for_versioning (struct
>
>    new_version = cgraph_create_node (new_decl);
>
> -   new_version->analyzed = true;
> +   new_version->analyzed = old_version->analyzed;

Hm?  analyzed means "with body", sure you have a body if you clone.

>    new_version->local = old_version->local;
>    new_version->local.externally_visible = false;
>    new_version->local.local = true;
> @@ -2294,6 +2294,7 @@ cgraph_copy_node_for_versioning (struct
>    new_version->rtl = old_version->rtl;
>    new_version->reachable = true;
>    new_version->count = old_version->count;
> +   new_version->lowered = true;

OTOH this isn't necessary true.  cgraph exists before lowering.

>    for (e = old_version->callees; e; e=e->next_callee)
>      if (!bbs_to_copy
> @@ -2389,7 +2390,6 @@ cgraph_function_versioning (struct cgrap
>   DECL_VIRTUAL_P (new_version_node->decl) = 0;
>   new_version_node->local.externally_visible = 0;
>   new_version_node->local.local = 1;
> -  new_version_node->lowered = true;
>
>   /* Update the call_expr on the edges to call the new version node. */
>   update_call_expr (new_version_node);
> Index: gcc/tree-ssa-alias.c
> ===================================================================
> --- gcc/tree-ssa-alias.c        (.../trunk)     (revision 180744)
> +++ gcc/tree-ssa-alias.c        (.../branches/transactional-memory)
> (revision 180773)
> @@ -1182,6 +1182,8 @@ ref_maybe_used_by_call_p_1 (gimple call,
>        case BUILT_IN_MEMPCPY:
>        case BUILT_IN_STPCPY:
>        case BUILT_IN_STPNCPY:
> +        case BUILT_IN_TM_MEMCPY:
> +        case BUILT_IN_TM_MEMMOVE:
>          {
>            ao_ref dref;
>            tree size = NULL_TREE;
> @@ -1228,6 +1230,32 @@ ref_maybe_used_by_call_p_1 (gimple call,
>                                           size);
>            return refs_may_alias_p_1 (&dref, ref, false);
>          }
> +
> +        /* The following functions read memory pointed to by their
> +          first argument.  */
> +       CASE_BUILT_IN_TM_LOAD (1):
> +       CASE_BUILT_IN_TM_LOAD (2):
> +       CASE_BUILT_IN_TM_LOAD (4):
> +       CASE_BUILT_IN_TM_LOAD (8):
> +        CASE_BUILT_IN_TM_LOAD (FLOAT):
> +       CASE_BUILT_IN_TM_LOAD (DOUBLE):
> +       CASE_BUILT_IN_TM_LOAD (LDOUBLE):
> +       CASE_BUILT_IN_TM_LOAD (M64):
> +       CASE_BUILT_IN_TM_LOAD (M128):
> +       CASE_BUILT_IN_TM_LOAD (M256):
> +        case BUILT_IN_TM_LOG:
> +        case BUILT_IN_TM_LOG_1:
> +        case BUILT_IN_TM_LOG_2:
> +        case BUILT_IN_TM_LOG_4:
> +        case BUILT_IN_TM_LOG_8:
> +        case BUILT_IN_TM_LOG_FLOAT:
> +        case BUILT_IN_TM_LOG_DOUBLE:
> +        case BUILT_IN_TM_LOG_LDOUBLE:
> +        case BUILT_IN_TM_LOG_M64:
> +        case BUILT_IN_TM_LOG_M128:
> +        case BUILT_IN_TM_LOG_M256:
> +         return ptr_deref_may_alias_ref_p_1 (gimple_call_arg (call, 0),
> ref);
> +
>        /* These read memory pointed to by the first argument.  */
>        case BUILT_IN_STRDUP:
>        case BUILT_IN_STRNDUP:
> @@ -1250,6 +1278,7 @@ ref_maybe_used_by_call_p_1 (gimple call,
>        case BUILT_IN_STACK_SAVE:
>        case BUILT_IN_STACK_RESTORE:
>        case BUILT_IN_MEMSET:
> +        case BUILT_IN_TM_MEMSET:
>        case BUILT_IN_MEMSET_CHK:
>        case BUILT_IN_FREXP:
>        case BUILT_IN_FREXPF:
> @@ -1480,6 +1509,19 @@ call_may_clobber_ref_p_1 (gimple call, a
>        case BUILT_IN_STRCAT:
>        case BUILT_IN_STRNCAT:
>        case BUILT_IN_MEMSET:
> +        case BUILT_IN_TM_MEMSET:
> +        CASE_BUILT_IN_TM_STORE (1):
> +        CASE_BUILT_IN_TM_STORE (2):
> +        CASE_BUILT_IN_TM_STORE (4):
> +        CASE_BUILT_IN_TM_STORE (8):
> +        CASE_BUILT_IN_TM_STORE (FLOAT):
> +        CASE_BUILT_IN_TM_STORE (DOUBLE):
> +        CASE_BUILT_IN_TM_STORE (LDOUBLE):
> +        CASE_BUILT_IN_TM_STORE (M64):
> +        CASE_BUILT_IN_TM_STORE (M128):
> +        CASE_BUILT_IN_TM_STORE (M256):
> +        case BUILT_IN_TM_MEMCPY:
> +        case BUILT_IN_TM_MEMMOVE:
>          {
>            ao_ref dref;
>            tree size = NULL_TREE;
> Index: gcc/ipa-inline.c
> ===================================================================
> --- gcc/ipa-inline.c    (.../trunk)     (revision 180744)
> +++ gcc/ipa-inline.c    (.../branches/transactional-memory)     (revision
> 180773)
> @@ -284,6 +284,15 @@ can_inline_edge_p (struct cgraph_edge *e
>       e->inline_failed = CIF_EH_PERSONALITY;
>       inlinable = false;
>     }
> +  /* TM pure functions should not get inlined if the outer function is
> +     a TM safe function.  */
> +  else if (flag_tm

Please move flag checks into the respective prediates.  Any reason
why the is_tm_pure () predicate wouldn't already do the correct thing
with !flag_tm?

> +          && is_tm_pure (callee->decl)
> +          && is_tm_safe (e->caller->decl))
> +    {
> +      e->inline_failed = CIF_UNSPECIFIED;
> +      inlinable = false;
> +    }
>   /* Don't inline if the callee can throw non-call exceptions but the
>      caller cannot.
>      FIXME: this is obviously wrong for LTO where STRUCT_FUNCTION is
> missing.
> Index: gcc/crtstuff.c
> ===================================================================
> --- gcc/crtstuff.c      (.../trunk)     (revision 180744)
> +++ gcc/crtstuff.c      (.../branches/transactional-memory)     (revision
> 180773)
> @@ -162,6 +162,9 @@ extern void __do_global_ctors_1 (void);
>  /* Likewise for _Jv_RegisterClasses.  */
>  extern void _Jv_RegisterClasses (void *) TARGET_ATTRIBUTE_WEAK;
>
> +extern void _ITM_registerTMCloneTable (void *, size_t)
> TARGET_ATTRIBUTE_WEAK;
> +extern void _ITM_deregisterTMCloneTable (void *) TARGET_ATTRIBUTE_WEAK;
> +
>  #ifdef OBJECT_FORMAT_ELF
>
>  /*  Declare a pointer to void function type.  */
> @@ -241,6 +244,11 @@ STATIC void *__JCR_LIST__[]
>   = { };
>  #endif /* JCR_SECTION_NAME */
>
> +STATIC func_ptr __TMC_LIST__[]
> +  __attribute__((unused, section(".tm_clone_table"),
> aligned(sizeof(void*))))
> +  = { };
> +extern func_ptr __TMC_END__[] __attribute__((__visibility__ ("hidden")));
> +
>  #if defined(INIT_SECTION_ASM_OP) || defined(INIT_ARRAY_SECTION_ASM_OP)
>
>  #ifdef OBJECT_FORMAT_ELF
> @@ -330,6 +338,13 @@ __do_global_dtors_aux (void)
>   }
>  #endif /* !defined(FINI_ARRAY_SECTION_ASM_OP) */
>
> +  if (_ITM_deregisterTMCloneTable)
> +    {
> +      size_t size = (size_t)(__TMC_END__ - __TMC_LIST__) / 2;
> +      if (size > 0)
> +       _ITM_deregisterTMCloneTable (__TMC_LIST__);
> +    }
> +
>  #ifdef USE_EH_FRAME_REGISTRY
>  #ifdef CRT_GET_RFIB_DATA
>   /* If we used the new __register_frame_info_bases interface,
> @@ -391,6 +406,12 @@ frame_dummy (void)
>        register_classes (__JCR_LIST__);
>     }
>  #endif /* JCR_SECTION_NAME */
> +  if (_ITM_registerTMCloneTable)
> +    {
> +      size_t size = (size_t)(__TMC_END__ - __TMC_LIST__) / 2;
> +      if (size > 0)
> +       _ITM_registerTMCloneTable (__TMC_LIST__, size);
> +    }
>  }
>
>  #ifdef INIT_SECTION_ASM_OP
> @@ -457,6 +478,13 @@ __do_global_dtors (void)
>   for (p = __DTOR_LIST__ + 1; (f = *p); p++)
>     f ();
>
> +  if (_ITM_deregisterTMCloneTable)
> +    {
> +      size_t size = (size_t)(__TMC_END__ - __TMC_LIST__) / 2;
> +      if (size > 0)
> +       _ITM_deregisterTMCloneTable (__TMC_LIST__);
> +    }
> +
>  #ifdef USE_EH_FRAME_REGISTRY
>   if (__deregister_frame_info)
>     __deregister_frame_info (__EH_FRAME_BEGIN__);
> @@ -570,6 +598,11 @@ STATIC void *__JCR_END__[1]
>    = { 0 };
>  #endif /* JCR_SECTION_NAME */
>
> +func_ptr __TMC_END__[]
> +  __attribute__((unused, section(".tm_clone_table"), aligned(sizeof(void
> *)),
> +                __visibility__ ("hidden")))
> +  = { };
> +
>  #ifdef INIT_ARRAY_SECTION_ASM_OP
>
>  /* If we are using .init_array, there is nothing to do.  */
> Index: gcc/cfgbuild.c
> ===================================================================
> --- gcc/cfgbuild.c      (.../trunk)     (revision 180744)
> +++ gcc/cfgbuild.c      (.../branches/transactional-memory)     (revision
> 180773)
> @@ -338,18 +338,30 @@ make_edges (basic_block min, basic_block
>          /* Add any appropriate EH edges.  */
>          rtl_make_eh_edge (edge_cache, bb, insn);
>
> -         if (code == CALL_INSN && nonlocal_goto_handler_labels)
> +         if (code == CALL_INSN)
>            {
> -             /* ??? This could be made smarter: in some cases it's possible
> -                to tell that certain calls will not do a nonlocal goto.
> -                For example, if the nested functions that do the nonlocal
> -                gotos do not have their addresses taken, then only calls to
> -                those functions or to other nested functions that use them
> -                could possibly do nonlocal gotos.  */
>              if (can_nonlocal_goto (insn))
> -               for (x = nonlocal_goto_handler_labels; x; x = XEXP (x, 1))
> -                 make_label_edge (edge_cache, bb, XEXP (x, 0),
> -                                  EDGE_ABNORMAL | EDGE_ABNORMAL_CALL);
> +               {
> +                 /* ??? This could be made smarter: in some cases it's
> +                    possible to tell that certain calls will not do a
> +                    nonlocal goto.  For example, if the nested functions
> +                    that do the nonlocal gotos do not have their addresses
> +                    taken, then only calls to those functions or to other
> +                    nested functions that use them could possibly do
> +                    nonlocal gotos.  */
> +                 for (x = nonlocal_goto_handler_labels; x; x = XEXP (x, 1))
> +                   make_label_edge (edge_cache, bb, XEXP (x, 0),
> +                                    EDGE_ABNORMAL | EDGE_ABNORMAL_CALL);
> +               }
> +
> +             if (flag_tm)
> +               {
> +                 rtx note;
> +                 for (note = REG_NOTES (insn); note; note = XEXP (note, 1))
> +                   if (REG_NOTE_KIND (note) == REG_TM)
> +                     make_label_edge (edge_cache, bb, XEXP (note, 0),
> +                                      EDGE_ABNORMAL | EDGE_ABNORMAL_CALL);
> +               }
>            }
>        }
>
> Index: gcc/timevar.def
> ===================================================================
> --- gcc/timevar.def     (.../trunk)     (revision 180744)
> +++ gcc/timevar.def     (.../branches/transactional-memory)     (revision
> 180773)
> @@ -184,6 +184,7 @@ DEFTIMEVAR (TV_TREE_COPY_RENAME          , "
>  DEFTIMEVAR (TV_TREE_SSA_VERIFY       , "tree SSA verifier")
>  DEFTIMEVAR (TV_TREE_STMT_VERIFY      , "tree STMT verifier")
>  DEFTIMEVAR (TV_TREE_SWITCH_CONVERSION, "tree switch initialization
> conversion")
> +DEFTIMEVAR (TV_TRANS_MEM             , "transactional memory")
>  DEFTIMEVAR (TV_TREE_STRLEN           , "tree strlen optimization")
>  DEFTIMEVAR (TV_CGRAPH_VERIFY         , "callgraph verifier")
>  DEFTIMEVAR (TV_DOM_FRONTIERS         , "dominance frontiers")
> Index: gcc/recog.c
> ===================================================================
> --- gcc/recog.c (.../trunk)     (revision 180744)
> +++ gcc/recog.c (.../branches/transactional-memory)     (revision 180773)
> @@ -3287,6 +3287,7 @@ peep2_attempt (basic_block bb, rtx insn,
>          {
>          case REG_NORETURN:
>          case REG_SETJMP:
> +         case REG_TM:
>            add_reg_note (new_insn, REG_NOTE_KIND (note),
>                          XEXP (note, 0));
>            break;
> Index: gcc/function.h
> ===================================================================
> --- gcc/function.h      (.../trunk)     (revision 180744)
> +++ gcc/function.h      (.../branches/transactional-memory)     (revision
> 180773)
> @@ -467,6 +467,14 @@ extern GTY(()) struct rtl_data x_rtl;
>    want to do differently.  */
>  #define crtl (&x_rtl)
>
> +/* This structure is used to map a gimple statement to a label,
> +   or list of labels to represent transaction restart.  */
> +
> +struct GTY(()) tm_restart_node {
> +  gimple stmt;
> +  tree label_or_list;
> +};
> +
>  struct GTY(()) stack_usage
>  {
>   /* # of bytes of static stack space allocated by the function.  */
> @@ -518,6 +526,10 @@ struct GTY(()) function {
>   /* Value histograms attached to particular statements.  */
>   htab_t GTY((skip)) value_histograms;
>
> +  /* Map gimple stmt to tree label (or list of labels) for transaction
> +     restart and abort.  */
> +  htab_t GTY ((param_is (struct tm_restart_node))) tm_restart;
> +

As this maps 'gimple' to tree shouldn't this go to fn->gimple_df instead?
That way you avoid growing generic struct function.  Or in to eh_status,
if that looks like a better fit.

>   /* For function.c.  */
>
>   /* Points to the FUNCTION_DECL of this function.  */
> Index: gcc/emit-rtl.c
> ===================================================================
> --- gcc/emit-rtl.c      (.../trunk)     (revision 180744)
> +++ gcc/emit-rtl.c      (.../branches/transactional-memory)     (revision
> 180773)
> @@ -3595,6 +3595,7 @@ try_split (rtx pat, rtx trial, int last)
>
>        case REG_NORETURN:
>        case REG_SETJMP:
> +       case REG_TM:
>          for (insn = insn_last; insn != NULL_RTX; insn = PREV_INSN (insn))
>            {
>              if (CALL_P (insn))
> Index: gcc/cfgexpand.c
> ===================================================================
> --- gcc/cfgexpand.c     (.../trunk)     (revision 180744)
> +++ gcc/cfgexpand.c     (.../branches/transactional-memory)     (revision
> 180773)
> @@ -2096,6 +2096,32 @@ expand_gimple_stmt (gimple stmt)
>        }
>     }
>
> +  /* Mark all calls that can have a transaction restart.  */

Why isn't this done when we expand the call?  This walking of the
RTL sequence looks like a hack (an easy one, albeit).

> +  if (cfun->tm_restart && is_gimple_call (stmt))
> +    {
> +      struct tm_restart_node dummy;
> +      void **slot;
> +
> +      dummy.stmt = stmt;
> +      slot = htab_find_slot (cfun->tm_restart, &dummy, NO_INSERT);
> +      if (slot)
> +       {
> +         struct tm_restart_node *n = (struct tm_restart_node *) *slot;
> +         tree list = n->label_or_list;
> +         rtx insn;
> +
> +         for (insn = next_real_insn (last); !CALL_P (insn);
> +              insn = next_real_insn (insn))
> +           continue;
> +
> +         if (TREE_CODE (list) == LABEL_DECL)
> +           add_reg_note (insn, REG_TM, label_rtx (list));
> +         else
> +           for (; list ; list = TREE_CHAIN (list))
> +             add_reg_note (insn, REG_TM, label_rtx (TREE_VALUE (list)));
> +       }
> +    }
> +
>   return last;
>  }
>
> @@ -4455,6 +4481,10 @@ gimple_expand_cfg (void)
>   /* After expanding, the return labels are no longer needed. */
>   return_label = NULL;
>   naked_return_label = NULL;
> +
> +  /* After expanding, the tm_restart map is no longer needed.  */
> +  cfun->tm_restart = NULL;

You should still free it, to not confuse the statistics code I think.

> +
>   /* Tag the blocks with a depth number so that change_scope can find
>      the common parent easily.  */
>   set_block_levels (DECL_INITIAL (cfun->decl), 0);
> Index: gcc/varasm.c
> ===================================================================
> --- gcc/varasm.c        (.../trunk)     (revision 180744)
> +++ gcc/varasm.c        (.../branches/transactional-memory)     (revision
> 180773)
> @@ -5859,6 +5859,103 @@ assemble_alias (tree decl, tree target)
>     }
>  }
>
> +/* Record and output a table of translations from original function
> +   to its transaction aware clone.  Note that tm_pure functions are
> +   considered to be their own clone.  */
> +
> +static GTY((if_marked ("tree_map_marked_p"), param_is (struct tree_map)))
> +     htab_t tm_clone_pairs;
> +
> +void
> +record_tm_clone_pair (tree o, tree n)
> +{
> +  struct tree_map **slot, *h;
> +
> +  if (tm_clone_pairs == NULL)
> +    tm_clone_pairs = htab_create_ggc (32, tree_map_hash, tree_map_eq, 0);
> +
> +  h = ggc_alloc_tree_map ();
> +  h->hash = htab_hash_pointer (o);
> +  h->base.from = o;
> +  h->to = n;
> +
> +  slot = (struct tree_map **)
> +    htab_find_slot_with_hash (tm_clone_pairs, h, h->hash, INSERT);
> +  *slot = h;
> +}
> +
> +tree
> +get_tm_clone_pair (tree o)
> +{
> +  if (tm_clone_pairs)
> +    {
> +      struct tree_map *h, in;
> +
> +      in.base.from = o;
> +      in.hash = htab_hash_pointer (o);
> +      h = (struct tree_map *) htab_find_with_hash (tm_clone_pairs,
> +                                                  &in, in.hash);
> +      if (h)
> +       return h->to;
> +    }
> +  return NULL_TREE;
> +}
> +
> +/* Helper function for finish_tm_clone_pairs.  Dump the clone table.  */
> +
> +int
> +finish_tm_clone_pairs_1 (void **slot, void *info ATTRIBUTE_UNUSED)
> +{
> +  struct tree_map *map = (struct tree_map *) *slot;
> +  bool *switched = (bool *) info;
> +  tree src = map->base.from;
> +  tree dst = map->to;
> +  struct cgraph_node *src_n = cgraph_get_node (src);
> +  struct cgraph_node *dst_n = cgraph_get_node (dst);
> +
> +  /* The function ipa_tm_create_version() marks the clone as needed if
> +     the original function was needed.  But we also mark the clone as
> +     needed if we ever called the clone indirectly through
> +     TM_GETTMCLONE.  If neither of these are true, we didn't generate
> +     a clone, and we didn't call it indirectly... no sense keeping it
> +     in the clone table.  */
> +  if (!dst_n || !dst_n->needed)
> +    return 1;
> +
> +  /* This covers the case where we have optimized the original
> +     function away, and only access the transactional clone.  */
> +  if (!src_n || !src_n->needed)
> +    return 1;
> +
> +  if (!*switched)
> +    {
> +      switch_to_section (get_named_section (NULL, ".tm_clone_table", 3));
> +      assemble_align (POINTER_SIZE);
> +      *switched = true;
> +    }
> +
> +  assemble_integer (XEXP (DECL_RTL (src), 0),
> +                   POINTER_SIZE / BITS_PER_UNIT, POINTER_SIZE, 1);
> +  assemble_integer (XEXP (DECL_RTL (dst), 0),
> +                   POINTER_SIZE / BITS_PER_UNIT, POINTER_SIZE, 1);
> +  return 1;
> +}
> +
> +void
> +finish_tm_clone_pairs (void)
> +{
> +  bool switched = false;
> +
> +  if (tm_clone_pairs == NULL)
> +    return;
> +
> +  htab_traverse_noresize (tm_clone_pairs, finish_tm_clone_pairs_1,
> +                         (void *) &switched);

This makes the generated table dependent on memory layout.  You
need to walk the pairs in some deterministic order.  In fact why not
walk all cgraph_nodes looking for the pairs - they should be still
in the list of clones for a node and you've marked it with DECL_TM_CLONE.
You can then sort them by cgraph node uid.

Did you check bootstrapping GCC with TM enabled and address-space
randomization turned on?

> +  htab_delete (tm_clone_pairs);
> +  tm_clone_pairs = NULL;
> +}
> +
> +
>  /* Emit an assembler directive to set symbol for DECL visibility to
>    the visibility type VIS, which must not be VISIBILITY_DEFAULT.  */
>
> Index: gcc/output.h
> ===================================================================
> --- gcc/output.h        (.../trunk)     (revision 180744)
> +++ gcc/output.h        (.../branches/transactional-memory)     (revision
> 180773)
> @@ -606,6 +606,11 @@ extern bool unlikely_text_section_p (sec
>  extern void switch_to_section (section *);
>  extern void output_section_asm_op (const void *);
>
> +extern void record_tm_clone_pair (tree, tree);
> +extern void finish_tm_clone_pairs (void);
> +extern int finish_tm_clone_pairs_1 (void **, void *);
> +extern tree get_tm_clone_pair (tree);
> +
>  extern void default_asm_output_source_filename (FILE *, const char *);
>  extern void output_file_directive (FILE *, const char *);
>
> Index: gcc/combine.c
> ===================================================================
> --- gcc/combine.c       (.../trunk)     (revision 180744)
> +++ gcc/combine.c       (.../branches/transactional-memory)     (revision
> 180773)
> @@ -13286,6 +13286,7 @@ distribute_notes (rtx notes, rtx from_in
>
>        case REG_NORETURN:
>        case REG_SETJMP:
> +       case REG_TM:
>          /* These notes must remain with the call.  It should not be
>             possible for both I2 and I3 to be a call.  */
>          if (CALL_P (i3))
> Index: gcc/tree-flow.h
> ===================================================================
> --- gcc/tree-flow.h     (.../trunk)     (revision 180744)
> +++ gcc/tree-flow.h     (.../branches/transactional-memory)     (revision
> 180773)
> @@ -778,6 +778,9 @@ extern bool maybe_duplicate_eh_stmt (gim
>  extern bool verify_eh_edges (gimple);
>  extern bool verify_eh_dispatch_edge (gimple);
>
> +/* In gtm-low.c  */
> +extern bool is_transactional_stmt (const_gimple);
> +

gimple.h please.  looks like a gimple predicate as well, so the implementation
should be in gimple.c?

>  /* In tree-ssa-pre.c  */
>  struct pre_expr_d;
>  void add_to_value (unsigned int, struct pre_expr_d *);
> Index: gcc/tree-ssa-structalias.c
> ===================================================================
> --- gcc/tree-ssa-structalias.c  (.../trunk)     (revision 180744)
> +++ gcc/tree-ssa-structalias.c  (.../branches/transactional-memory)
> (revision 180773)
> @@ -4024,6 +4024,8 @@ find_func_aliases_for_builtin_call (gimp
>       case BUILT_IN_STPCPY_CHK:
>       case BUILT_IN_STRCAT_CHK:
>       case BUILT_IN_STRNCAT_CHK:
> +      case BUILT_IN_TM_MEMCPY:
> +      case BUILT_IN_TM_MEMMOVE:
>        {
>          tree res = gimple_call_lhs (t);
>          tree dest = gimple_call_arg (t, (DECL_FUNCTION_CODE (fndecl)
> @@ -4056,6 +4058,7 @@ find_func_aliases_for_builtin_call (gimp
>        }
>       case BUILT_IN_MEMSET:
>       case BUILT_IN_MEMSET_CHK:
> +      case BUILT_IN_TM_MEMSET:
>        {
>          tree res = gimple_call_lhs (t);
>          tree dest = gimple_call_arg (t, 0);
> @@ -4197,6 +4200,50 @@ find_func_aliases_for_builtin_call (gimp
>            }
>          return true;
>        }
> +      CASE_BUILT_IN_TM_STORE (1):
> +      CASE_BUILT_IN_TM_STORE (2):
> +      CASE_BUILT_IN_TM_STORE (4):
> +      CASE_BUILT_IN_TM_STORE (8):
> +      CASE_BUILT_IN_TM_STORE (FLOAT):
> +      CASE_BUILT_IN_TM_STORE (DOUBLE):
> +      CASE_BUILT_IN_TM_STORE (LDOUBLE):
> +      CASE_BUILT_IN_TM_STORE (M64):
> +      CASE_BUILT_IN_TM_STORE (M128):
> +      CASE_BUILT_IN_TM_STORE (M256):
> +       {
> +         tree addr = gimple_call_arg (t, 0);
> +         tree src = gimple_call_arg (t, 1);
> +
> +         get_constraint_for (addr, &lhsc);
> +         do_deref (&lhsc);
> +         get_constraint_for (src, &rhsc);
> +         process_all_all_constraints (lhsc, rhsc);
> +         VEC_free (ce_s, heap, lhsc);
> +         VEC_free (ce_s, heap, rhsc);
> +         return true;
> +       }
> +      CASE_BUILT_IN_TM_LOAD (1):
> +      CASE_BUILT_IN_TM_LOAD (2):
> +      CASE_BUILT_IN_TM_LOAD (4):
> +      CASE_BUILT_IN_TM_LOAD (8):
> +      CASE_BUILT_IN_TM_LOAD (FLOAT):
> +      CASE_BUILT_IN_TM_LOAD (DOUBLE):
> +      CASE_BUILT_IN_TM_LOAD (LDOUBLE):
> +      CASE_BUILT_IN_TM_LOAD (M64):
> +      CASE_BUILT_IN_TM_LOAD (M128):
> +      CASE_BUILT_IN_TM_LOAD (M256):
> +        {
> +         tree dest = gimple_call_lhs (t);
> +         tree addr = gimple_call_arg (t, 0);
> +
> +         get_constraint_for (dest, &lhsc);
> +         get_constraint_for (addr, &rhsc);
> +         do_deref (&rhsc);
> +         process_all_all_constraints (lhsc, rhsc);
> +         VEC_free (ce_s, heap, lhsc);
> +         VEC_free (ce_s, heap, rhsc);
> +         return true;
> +        }
>       /* Variadic argument handling needs to be handled in IPA
>         mode as well.  */
>       case BUILT_IN_VA_START:
> Index: gcc/tree-cfg.c
> ===================================================================
> --- gcc/tree-cfg.c      (.../trunk)     (revision 180744)
> +++ gcc/tree-cfg.c      (.../branches/transactional-memory)     (revision
> 180773)
> @@ -666,6 +666,15 @@ make_edges (void)
>                }
>              break;
>
> +           case GIMPLE_TRANSACTION:
> +             {
> +               tree abort_label = gimple_transaction_label (last);
> +               if (abort_label)
> +                 make_edge (bb, label_to_block (abort_label), 0);
> +               fallthru = true;
> +             }
> +             break;
> +
>            default:
>              gcc_assert (!stmt_ends_bb_p (last));
>              fallthru = true;
> @@ -1196,22 +1205,30 @@ cleanup_dead_labels (void)
>   FOR_EACH_BB (bb)
>     {
>       gimple stmt = last_stmt (bb);
> +      tree label, new_label;
> +
>       if (!stmt)
>        continue;
>
>       switch (gimple_code (stmt))
>        {
>        case GIMPLE_COND:
> -         {
> -           tree true_label = gimple_cond_true_label (stmt);
> -           tree false_label = gimple_cond_false_label (stmt);
> +         label = gimple_cond_true_label (stmt);
> +         if (label)
> +           {
> +             new_label = main_block_label (label);
> +             if (new_label != label)
> +               gimple_cond_set_true_label (stmt, new_label);
> +           }
>
> -           if (true_label)
> -             gimple_cond_set_true_label (stmt, main_block_label
> (true_label));
> -           if (false_label)
> -             gimple_cond_set_false_label (stmt, main_block_label
> (false_label));
> -           break;
> -         }
> +         label = gimple_cond_false_label (stmt);
> +         if (label)
> +           {
> +             new_label = main_block_label (label);
> +             if (new_label != label)
> +               gimple_cond_set_false_label (stmt, new_label);
> +           }
> +         break;
>
>        case GIMPLE_SWITCH:
>          {
> @@ -1221,8 +1238,10 @@ cleanup_dead_labels (void)
>            for (i = 0; i < n; ++i)
>              {
>                tree case_label = gimple_switch_label (stmt, i);
> -               tree label = main_block_label (CASE_LABEL (case_label));
> -               CASE_LABEL (case_label) = label;
> +               label = CASE_LABEL (case_label);
> +               new_label = main_block_label (label);
> +               if (new_label != label)
> +                 CASE_LABEL (case_label) = new_label;
>              }
>            break;
>          }
> @@ -1243,13 +1262,27 @@ cleanup_dead_labels (void)
>        /* We have to handle gotos until they're removed, and we don't
>           remove them until after we've created the CFG edges.  */
>        case GIMPLE_GOTO:
> -          if (!computed_goto_p (stmt))
> +         if (!computed_goto_p (stmt))
>            {
> -             tree new_dest = main_block_label (gimple_goto_dest (stmt));
> -             gimple_goto_set_dest (stmt, new_dest);
> +             label = gimple_goto_dest (stmt);
> +             new_label = main_block_label (label);
> +             if (new_label != label)
> +               gimple_goto_set_dest (stmt, new_label);

What's the reason for this changes?  Optimization?

>            }
>          break;
>
> +       case GIMPLE_TRANSACTION:
> +         {
> +           tree label = gimple_transaction_label (stmt);
> +           if (label)
> +             {
> +               tree new_label = main_block_label (label);
> +               if (new_label != label)
> +                 gimple_transaction_set_label (stmt, new_label);
> +             }
> +         }
> +         break;
> +
>        default:
>          break;
>       }
> @@ -2263,6 +2296,13 @@ is_ctrl_altering_stmt (gimple t)
>        if (flags & ECF_NORETURN)
>          return true;
>
> +       /* TM ending statements have backedges out of the transaction.
> +          Return true so we split the basic block containing
> +          them.  */
> +       if ((flags & ECF_TM_OPS)
> +           && is_tm_ending_fndecl (gimple_call_fndecl (t)))
> +         return true;
> +
>        /* BUILT_IN_RETURN call is same as return statement.  */
>        if (gimple_call_builtin_p (t, BUILT_IN_RETURN))
>          return true;
> @@ -2284,6 +2324,10 @@ is_ctrl_altering_stmt (gimple t)
>       /* OpenMP directives alter control flow.  */
>       return true;
>
> +    case GIMPLE_TRANSACTION:
> +      /* A transaction start alters control flow.  */
> +      return true;
> +
>     default:
>       break;
>     }
> @@ -4054,6 +4098,17 @@ verify_gimple_switch (gimple stmt)
>   return false;
>  }
>
> +/* Verify the contents of a GIMPLE_TRANSACTION.  Returns true if there
> +   is a problem, otherwise false.  */
> +
> +static bool
> +verify_gimple_transaction (gimple stmt)
> +{
> +  tree lab = gimple_transaction_label (stmt);
> +  if (lab != NULL && TREE_CODE (lab) != LABEL_DECL)
> +    return true;

ISTR this has substatements, so you should handle this in
verify_gimple_in_seq_2 and make sure to verify those substatements.

> +  return false;
> +}
>
>  /* Verify a gimple debug statement STMT.
>    Returns true if anything is wrong.  */
> @@ -4155,6 +4210,9 @@ verify_gimple_stmt (gimple stmt)
>     case GIMPLE_ASM:
>       return false;
>
> +    case GIMPLE_TRANSACTION:
> +      return verify_gimple_transaction (stmt);
> +

Not here.

>     /* Tuples that do not have tree operands.  */
>     case GIMPLE_NOP:
>     case GIMPLE_PREDICT:
> @@ -4271,10 +4329,19 @@ verify_gimple_in_seq_2 (gimple_seq stmts
>          err |= verify_gimple_in_seq_2 (gimple_eh_filter_failure (stmt));
>          break;
>
> +       case GIMPLE_EH_ELSE:
> +         err |= verify_gimple_in_seq_2 (gimple_eh_else_n_body (stmt));
> +         err |= verify_gimple_in_seq_2 (gimple_eh_else_e_body (stmt));
> +         break;
> +
>        case GIMPLE_CATCH:
>          err |= verify_gimple_in_seq_2 (gimple_catch_handler (stmt));
>          break;
>
> +       case GIMPLE_TRANSACTION:
> +         err |= verify_gimple_in_seq_2 (gimple_transaction_body (stmt));
> +         break;
> +

Ah, you do.  But you'll never call your label verification code.

>        default:
>          {
>            bool err2 = verify_gimple_stmt (stmt);
> @@ -5052,6 +5119,14 @@ gimple_redirect_edge_and_branch (edge e,
>        redirect_eh_dispatch_edge (stmt, e, dest);
>       break;
>
> +    case GIMPLE_TRANSACTION:
> +      /* The ABORT edge has a stored label associated with it, otherwise
> +        the edges are simply redirectable.  */
> +      /* ??? We don't really need this label after the cfg is created.  */
> +      if (e->flags == 0)
> +       gimple_transaction_set_label (stmt, gimple_block_label (dest));

So why set it (and thus keep it live)?

> +      break;
> +
>     default:
>       /* Otherwise it must be a fallthru edge, and we don't need to
>         do anything besides redirecting it.  */
> @@ -6428,8 +6503,10 @@ dump_function_to_file (tree fn, FILE *fi
>   bool ignore_topmost_bind = false, any_var = false;
>   basic_block bb;
>   tree chain;
> +  bool tmclone = TREE_CODE (fn) == FUNCTION_DECL && DECL_IS_TM_CLONE (fn);
>
> -  fprintf (file, "%s (", lang_hooks.decl_printable_name (fn, 2));
> +  fprintf (file, "%s %s(", lang_hooks.decl_printable_name (fn, 2),
> +          tmclone ? "[tm-clone] " : "");
>
>   arg = DECL_ARGUMENTS (fn);
>   while (arg)
> Index: gcc/passes.c
> ===================================================================
> --- gcc/passes.c        (.../trunk)     (revision 180744)
> +++ gcc/passes.c        (.../branches/transactional-memory)     (revision
> 180773)
> @@ -1174,9 +1174,11 @@ init_optimization_passes (void)
>   p = &all_lowering_passes;
>   NEXT_PASS (pass_warn_unused_result);
>   NEXT_PASS (pass_diagnose_omp_blocks);
> +  NEXT_PASS (pass_diagnose_tm_blocks);
>   NEXT_PASS (pass_mudflap_1);
>   NEXT_PASS (pass_lower_omp);
>   NEXT_PASS (pass_lower_cf);
> +  NEXT_PASS (pass_lower_tm);
>   NEXT_PASS (pass_refactor_eh);
>   NEXT_PASS (pass_lower_eh);
>   NEXT_PASS (pass_build_cfg);
> @@ -1241,6 +1243,7 @@ init_optimization_passes (void)
>     }
>   NEXT_PASS (pass_ipa_increase_alignment);
>   NEXT_PASS (pass_ipa_matrix_reorg);
> +  NEXT_PASS (pass_ipa_tm);
>   NEXT_PASS (pass_ipa_lower_emutls);
>   *p = NULL;
>
> @@ -1400,6 +1403,13 @@ init_optimization_passes (void)
>       NEXT_PASS (pass_uncprop);
>       NEXT_PASS (pass_local_pure_const);
>     }
> +  NEXT_PASS (pass_tm_init);
> +    {
> +      struct opt_pass **p = &pass_tm_init.pass.sub;
> +      NEXT_PASS (pass_tm_mark);
> +      NEXT_PASS (pass_tm_memopt);
> +      NEXT_PASS (pass_tm_edges);
> +    }
>   NEXT_PASS (pass_lower_complex_O0);
>   NEXT_PASS (pass_cleanup_eh);
>   NEXT_PASS (pass_lower_resx);
> Index: gcc/reg-notes.def
> ===================================================================
> --- gcc/reg-notes.def   (.../trunk)     (revision 180744)
> +++ gcc/reg-notes.def   (.../branches/transactional-memory)     (revision
> 180773)
> @@ -203,6 +203,11 @@ REG_NOTE (CROSSING_JUMP)
>    functions that can return twice.  */
>  REG_NOTE (SETJMP)
>
> +/* This kind of note is generated at each transactional memory
> +   builtin, to indicate we need to generate transaction restart
> +   edges for this insn.  */
> +REG_NOTE (TM)
> +
>  /* Indicates the cumulative offset of the stack pointer accounting
>    for pushed arguments.  This will only be generated when
>    ACCUMULATE_OUTGOING_ARGS is false.  */
> Index: gcc/cfgrtl.c
> ===================================================================
> --- gcc/cfgrtl.c        (.../trunk)     (revision 180744)
> +++ gcc/cfgrtl.c        (.../branches/transactional-memory)     (revision
> 180773)
> @@ -2246,6 +2246,8 @@ purge_dead_edges (basic_block bb)
>            ;
>          else if ((e->flags & EDGE_EH) && can_throw_internal (insn))
>            ;
> +         else if (flag_tm && find_reg_note (insn, REG_TM, NULL))
> +           ;
>          else
>            remove = true;
>        }
> Index: gcc/params.def
> ===================================================================
> --- gcc/params.def      (.../trunk)     (revision 180744)
> +++ gcc/params.def      (.../branches/transactional-memory)     (revision
> 180773)
> @@ -872,6 +872,13 @@ DEFPARAM (PARAM_IPA_SRA_PTR_GROWTH_FACTO
>          "a pointer to an aggregate with",
>          2, 0, 0)
>
> +DEFPARAM (PARAM_TM_MAX_AGGREGATE_SIZE,
> +         "tm-max-aggregate-size",
> +         "Size in bytes after which thread-local aggregates should be "
> +         "instrumented with the logging functions instead of save/restore "
> +         "pairs",
> +         9, 0, 0)
> +
>  DEFPARAM (PARAM_IPA_CP_VALUE_LIST_SIZE,
>          "ipa-cp-value-list-size",
>          "Maximum size of a list of values associated with each parameter
> for "
>

Patch

Index: gcc/cgraph.h
===================================================================
--- gcc/cgraph.h	(.../trunk)	(revision 180744)
+++ gcc/cgraph.h	(.../branches/transactional-memory)	(revision 180773)
@@ -98,6 +98,9 @@  struct GTY(()) cgraph_local_info {
    /* True when the function has been originally extern inline, but it is
       redefined now.  */
    unsigned redefined_extern_inline : 1;
+
+  /* True if the function may enter serial irrevocable mode.  */
+  unsigned tm_may_enter_irr : 1;
  };

  /* Information about the function that needs to be computed globally
@@ -565,6 +568,8 @@  void verify_cgraph_node (struct cgraph_n
  void cgraph_build_static_cdtor (char which, tree body, int priority);
  void cgraph_reset_static_var_maps (void);
  void init_cgraph (void);
+struct cgraph_node * cgraph_copy_node_for_versioning (struct cgraph_node *,
+		tree, VEC(cgraph_edge_p,heap)*, bitmap);
  struct cgraph_node *cgraph_function_versioning (struct cgraph_node *,
  						VEC(cgraph_edge_p,heap)*,
  						VEC(ipa_replace_map_p,gc)*,
Index: gcc/tree-pass.h
===================================================================
--- gcc/tree-pass.h	(.../trunk)	(revision 180744)
+++ gcc/tree-pass.h	(.../branches/transactional-memory)	(revision 180773)
@@ -447,6 +447,12 @@  extern struct gimple_opt_pass pass_build
  extern struct gimple_opt_pass pass_local_pure_const;
  extern struct gimple_opt_pass pass_tracer;
  extern struct gimple_opt_pass pass_warn_unused_result;
+extern struct gimple_opt_pass pass_diagnose_tm_blocks;
+extern struct gimple_opt_pass pass_lower_tm;
+extern struct gimple_opt_pass pass_tm_init;
+extern struct gimple_opt_pass pass_tm_mark;
+extern struct gimple_opt_pass pass_tm_memopt;
+extern struct gimple_opt_pass pass_tm_edges;
  extern struct gimple_opt_pass pass_split_functions;
  extern struct gimple_opt_pass pass_feedback_split_functions;

@@ -469,6 +475,7 @@  extern struct ipa_opt_pass_d pass_ipa_pu
  extern struct simple_ipa_opt_pass pass_ipa_pta;
  extern struct ipa_opt_pass_d pass_ipa_lto_wpa_fixup;
  extern struct ipa_opt_pass_d pass_ipa_lto_finish_out;
+extern struct simple_ipa_opt_pass pass_ipa_tm;
  extern struct ipa_opt_pass_d pass_ipa_profile;
  extern struct ipa_opt_pass_d pass_ipa_cdtor_merge;

Index: gcc/rtlanal.c
===================================================================
--- gcc/rtlanal.c	(.../trunk)	(revision 180744)
+++ gcc/rtlanal.c	(.../branches/transactional-memory)	(revision 180773)
@@ -1918,6 +1918,7 @@  alloc_reg_note (enum reg_note kind, rtx
      case REG_CC_USER:
      case REG_LABEL_TARGET:
      case REG_LABEL_OPERAND:
+    case REG_TM:
        /* These types of register notes use an INSN_LIST rather than an
  	 EXPR_LIST, so that copying is done right and dumps look
  	 better.  */
Index: gcc/omp-low.c
===================================================================
--- gcc/omp-low.c	(.../trunk)	(revision 180744)
+++ gcc/omp-low.c	(.../branches/transactional-memory)	(revision 180773)
@@ -139,6 +139,7 @@  static tree scan_omp_1_op (tree *, int *
      case GIMPLE_TRY: \
      case GIMPLE_CATCH: \
      case GIMPLE_EH_FILTER: \
+    case GIMPLE_TRANSACTION: \
        /* The sub-statements for these should be walked.  */ \
        *handled_ops_p = false; \
        break;
Index: gcc/toplev.c
===================================================================
--- gcc/toplev.c	(.../trunk)	(revision 180744)
+++ gcc/toplev.c	(.../branches/transactional-memory)	(revision 180773)
@@ -599,6 +599,7 @@  compile_file (void)

        output_shared_constant_pool ();
        output_object_blocks ();
+  finish_tm_clone_pairs ();

        /* Write out any pending weak symbol declarations.  */
        weak_finish ();
Index: gcc/cgraphunit.c
===================================================================
--- gcc/cgraphunit.c	(.../trunk)	(revision 180744)
+++ gcc/cgraphunit.c	(.../branches/transactional-memory)	(revision 180773)
@@ -2272,7 +2272,7 @@  update_call_expr (struct cgraph_node *ne
     was copied to prevent duplications of calls that are dead
     in the clone.  */

-static struct cgraph_node *
+struct cgraph_node *
  cgraph_copy_node_for_versioning (struct cgraph_node *old_version,
  				 tree new_decl,
  				 VEC(cgraph_edge_p,heap) *redirect_callers,
@@ -2286,7 +2286,7 @@  cgraph_copy_node_for_versioning (struct

     new_version = cgraph_create_node (new_decl);

-   new_version->analyzed = true;
+   new_version->analyzed = old_version->analyzed;
     new_version->local = old_version->local;
     new_version->local.externally_visible = false;
     new_version->local.local = true;
@@ -2294,6 +2294,7 @@  cgraph_copy_node_for_versioning (struct
     new_version->rtl = old_version->rtl;
     new_version->reachable = true;
     new_version->count = old_version->count;
+   new_version->lowered = true;

     for (e = old_version->callees; e; e=e->next_callee)
       if (!bbs_to_copy
@@ -2389,7 +2390,6 @@  cgraph_function_versioning (struct cgrap
    DECL_VIRTUAL_P (new_version_node->decl) = 0;
    new_version_node->local.externally_visible = 0;
    new_version_node->local.local = 1;
-  new_version_node->lowered = true;

    /* Update the call_expr on the edges to call the new version node. */
    update_call_expr (new_version_node);
Index: gcc/tree-ssa-alias.c
===================================================================
--- gcc/tree-ssa-alias.c	(.../trunk)	(revision 180744)
+++ gcc/tree-ssa-alias.c	(.../branches/transactional-memory)	(revision 
180773)
@@ -1182,6 +1182,8 @@  ref_maybe_used_by_call_p_1 (gimple call,
  	case BUILT_IN_MEMPCPY:
  	case BUILT_IN_STPCPY:
  	case BUILT_IN_STPNCPY:
+        case BUILT_IN_TM_MEMCPY:
+        case BUILT_IN_TM_MEMMOVE:
  	  {
  	    ao_ref dref;
  	    tree size = NULL_TREE;
@@ -1228,6 +1230,32 @@  ref_maybe_used_by_call_p_1 (gimple call,
  					   size);
  	    return refs_may_alias_p_1 (&dref, ref, false);
  	  }
+
+        /* The following functions read memory pointed to by their
+	   first argument.  */
+	CASE_BUILT_IN_TM_LOAD (1):
+	CASE_BUILT_IN_TM_LOAD (2):
+	CASE_BUILT_IN_TM_LOAD (4):
+	CASE_BUILT_IN_TM_LOAD (8):
+        CASE_BUILT_IN_TM_LOAD (FLOAT):
+	CASE_BUILT_IN_TM_LOAD (DOUBLE):
+	CASE_BUILT_IN_TM_LOAD (LDOUBLE):
+	CASE_BUILT_IN_TM_LOAD (M64):
+	CASE_BUILT_IN_TM_LOAD (M128):
+	CASE_BUILT_IN_TM_LOAD (M256):
+        case BUILT_IN_TM_LOG:
+        case BUILT_IN_TM_LOG_1:
+        case BUILT_IN_TM_LOG_2:
+        case BUILT_IN_TM_LOG_4:
+        case BUILT_IN_TM_LOG_8:
+        case BUILT_IN_TM_LOG_FLOAT:
+        case BUILT_IN_TM_LOG_DOUBLE:
+        case BUILT_IN_TM_LOG_LDOUBLE:
+        case BUILT_IN_TM_LOG_M64:
+        case BUILT_IN_TM_LOG_M128:
+        case BUILT_IN_TM_LOG_M256:
+	  return ptr_deref_may_alias_ref_p_1 (gimple_call_arg (call, 0), ref);
+
  	/* These read memory pointed to by the first argument.  */
  	case BUILT_IN_STRDUP:
  	case BUILT_IN_STRNDUP:
@@ -1250,6 +1278,7 @@  ref_maybe_used_by_call_p_1 (gimple call,
  	case BUILT_IN_STACK_SAVE:
  	case BUILT_IN_STACK_RESTORE:
  	case BUILT_IN_MEMSET:
+        case BUILT_IN_TM_MEMSET:
  	case BUILT_IN_MEMSET_CHK:
  	case BUILT_IN_FREXP:
  	case BUILT_IN_FREXPF:
@@ -1480,6 +1509,19 @@  call_may_clobber_ref_p_1 (gimple call, a
  	case BUILT_IN_STRCAT:
  	case BUILT_IN_STRNCAT:
  	case BUILT_IN_MEMSET:
+        case BUILT_IN_TM_MEMSET:
+        CASE_BUILT_IN_TM_STORE (1):
+        CASE_BUILT_IN_TM_STORE (2):
+        CASE_BUILT_IN_TM_STORE (4):
+        CASE_BUILT_IN_TM_STORE (8):
+        CASE_BUILT_IN_TM_STORE (FLOAT):
+        CASE_BUILT_IN_TM_STORE (DOUBLE):
+        CASE_BUILT_IN_TM_STORE (LDOUBLE):
+        CASE_BUILT_IN_TM_STORE (M64):
+        CASE_BUILT_IN_TM_STORE (M128):
+        CASE_BUILT_IN_TM_STORE (M256):
+        case BUILT_IN_TM_MEMCPY:
+        case BUILT_IN_TM_MEMMOVE:
  	  {
  	    ao_ref dref;
  	    tree size = NULL_TREE;
Index: gcc/ipa-inline.c
===================================================================
--- gcc/ipa-inline.c	(.../trunk)	(revision 180744)
+++ gcc/ipa-inline.c	(.../branches/transactional-memory)	(revision 180773)
@@ -284,6 +284,15 @@  can_inline_edge_p (struct cgraph_edge *e
        e->inline_failed = CIF_EH_PERSONALITY;
        inlinable = false;
      }
+  /* TM pure functions should not get inlined if the outer function is
+     a TM safe function.  */
+  else if (flag_tm
+	   && is_tm_pure (callee->decl)
+	   && is_tm_safe (e->caller->decl))
+    {
+      e->inline_failed = CIF_UNSPECIFIED;
+      inlinable = false;
+    }
    /* Don't inline if the callee can throw non-call exceptions but the
       caller cannot.
       FIXME: this is obviously wrong for LTO where STRUCT_FUNCTION is 
missing.
Index: gcc/crtstuff.c
===================================================================
--- gcc/crtstuff.c	(.../trunk)	(revision 180744)
+++ gcc/crtstuff.c	(.../branches/transactional-memory)	(revision 180773)
@@ -162,6 +162,9 @@  extern void __do_global_ctors_1 (void);
  /* Likewise for _Jv_RegisterClasses.  */
  extern void _Jv_RegisterClasses (void *) TARGET_ATTRIBUTE_WEAK;

+extern void _ITM_registerTMCloneTable (void *, size_t) 
TARGET_ATTRIBUTE_WEAK;
+extern void _ITM_deregisterTMCloneTable (void *) TARGET_ATTRIBUTE_WEAK;
+
  #ifdef OBJECT_FORMAT_ELF

  /*  Declare a pointer to void function type.  */
@@ -241,6 +244,11 @@  STATIC void *__JCR_LIST__[]
    = { };
  #endif /* JCR_SECTION_NAME */

+STATIC func_ptr __TMC_LIST__[]
+  __attribute__((unused, section(".tm_clone_table"), 
aligned(sizeof(void*))))
+  = { };
+extern func_ptr __TMC_END__[] __attribute__((__visibility__ ("hidden")));
+
  #if defined(INIT_SECTION_ASM_OP) || defined(INIT_ARRAY_SECTION_ASM_OP)

  #ifdef OBJECT_FORMAT_ELF
@@ -330,6 +338,13 @@  __do_global_dtors_aux (void)
    }
  #endif /* !defined(FINI_ARRAY_SECTION_ASM_OP) */

+  if (_ITM_deregisterTMCloneTable)
+    {
+      size_t size = (size_t)(__TMC_END__ - __TMC_LIST__) / 2;
+      if (size > 0)
+	_ITM_deregisterTMCloneTable (__TMC_LIST__);
+    }
+
  #ifdef USE_EH_FRAME_REGISTRY
  #ifdef CRT_GET_RFIB_DATA
    /* If we used the new __register_frame_info_bases interface,
@@ -391,6 +406,12 @@  frame_dummy (void)
  	register_classes (__JCR_LIST__);
      }
  #endif /* JCR_SECTION_NAME */
+  if (_ITM_registerTMCloneTable)
+    {
+      size_t size = (size_t)(__TMC_END__ - __TMC_LIST__) / 2;
+      if (size > 0)
+	_ITM_registerTMCloneTable (__TMC_LIST__, size);
+    }
  }

  #ifdef INIT_SECTION_ASM_OP
@@ -457,6 +478,13 @@  __do_global_dtors (void)
    for (p = __DTOR_LIST__ + 1; (f = *p); p++)
      f ();

+  if (_ITM_deregisterTMCloneTable)
+    {
+      size_t size = (size_t)(__TMC_END__ - __TMC_LIST__) / 2;
+      if (size > 0)
+	_ITM_deregisterTMCloneTable (__TMC_LIST__);
+    }
+
  #ifdef USE_EH_FRAME_REGISTRY
    if (__deregister_frame_info)
      __deregister_frame_info (__EH_FRAME_BEGIN__);
@@ -570,6 +598,11 @@  STATIC void *__JCR_END__[1]
     = { 0 };
  #endif /* JCR_SECTION_NAME */

+func_ptr __TMC_END__[]
+  __attribute__((unused, section(".tm_clone_table"), 
aligned(sizeof(void *)),
+		 __visibility__ ("hidden")))
+  = { };
+
  #ifdef INIT_ARRAY_SECTION_ASM_OP

  /* If we are using .init_array, there is nothing to do.  */
Index: gcc/cfgbuild.c
===================================================================
--- gcc/cfgbuild.c	(.../trunk)	(revision 180744)
+++ gcc/cfgbuild.c	(.../branches/transactional-memory)	(revision 180773)
@@ -338,18 +338,30 @@  make_edges (basic_block min, basic_block
  	  /* Add any appropriate EH edges.  */
  	  rtl_make_eh_edge (edge_cache, bb, insn);

-	  if (code == CALL_INSN && nonlocal_goto_handler_labels)
+	  if (code == CALL_INSN)
  	    {
-	      /* ??? This could be made smarter: in some cases it's possible
-		 to tell that certain calls will not do a nonlocal goto.
-		 For example, if the nested functions that do the nonlocal
-		 gotos do not have their addresses taken, then only calls to
-		 those functions or to other nested functions that use them
-		 could possibly do nonlocal gotos.  */
  	      if (can_nonlocal_goto (insn))
-		for (x = nonlocal_goto_handler_labels; x; x = XEXP (x, 1))
-		  make_label_edge (edge_cache, bb, XEXP (x, 0),
-				   EDGE_ABNORMAL | EDGE_ABNORMAL_CALL);
+		{
+		  /* ??? This could be made smarter: in some cases it's
+		     possible to tell that certain calls will not do a
+		     nonlocal goto.  For example, if the nested functions
+		     that do the nonlocal gotos do not have their addresses
+		     taken, then only calls to those functions or to other
+		     nested functions that use them could possibly do
+		     nonlocal gotos.  */
+		  for (x = nonlocal_goto_handler_labels; x; x = XEXP (x, 1))
+		    make_label_edge (edge_cache, bb, XEXP (x, 0),
+				     EDGE_ABNORMAL | EDGE_ABNORMAL_CALL);
+		}
+
+	      if (flag_tm)
+		{
+		  rtx note;
+		  for (note = REG_NOTES (insn); note; note = XEXP (note, 1))
+		    if (REG_NOTE_KIND (note) == REG_TM)
+		      make_label_edge (edge_cache, bb, XEXP (note, 0),
+				       EDGE_ABNORMAL | EDGE_ABNORMAL_CALL);
+		}
  	    }
  	}

Index: gcc/timevar.def
===================================================================
--- gcc/timevar.def	(.../trunk)	(revision 180744)
+++ gcc/timevar.def	(.../branches/transactional-memory)	(revision 180773)
@@ -184,6 +184,7 @@  DEFTIMEVAR (TV_TREE_COPY_RENAME	     , "
  DEFTIMEVAR (TV_TREE_SSA_VERIFY       , "tree SSA verifier")
  DEFTIMEVAR (TV_TREE_STMT_VERIFY      , "tree STMT verifier")
  DEFTIMEVAR (TV_TREE_SWITCH_CONVERSION, "tree switch initialization 
conversion")
+DEFTIMEVAR (TV_TRANS_MEM             , "transactional memory")
  DEFTIMEVAR (TV_TREE_STRLEN           , "tree strlen optimization")
  DEFTIMEVAR (TV_CGRAPH_VERIFY         , "callgraph verifier")
  DEFTIMEVAR (TV_DOM_FRONTIERS         , "dominance frontiers")
Index: gcc/recog.c
===================================================================
--- gcc/recog.c	(.../trunk)	(revision 180744)
+++ gcc/recog.c	(.../branches/transactional-memory)	(revision 180773)
@@ -3287,6 +3287,7 @@  peep2_attempt (basic_block bb, rtx insn,
  	  {
  	  case REG_NORETURN:
  	  case REG_SETJMP:
+	  case REG_TM:
  	    add_reg_note (new_insn, REG_NOTE_KIND (note),
  			  XEXP (note, 0));
  	    break;
Index: gcc/function.h
===================================================================
--- gcc/function.h	(.../trunk)	(revision 180744)
+++ gcc/function.h	(.../branches/transactional-memory)	(revision 180773)
@@ -467,6 +467,14 @@  extern GTY(()) struct rtl_data x_rtl;
     want to do differently.  */
  #define crtl (&x_rtl)

+/* This structure is used to map a gimple statement to a label,
+   or list of labels to represent transaction restart.  */
+
+struct GTY(()) tm_restart_node {
+  gimple stmt;
+  tree label_or_list;
+};
+
  struct GTY(()) stack_usage
  {
    /* # of bytes of static stack space allocated by the function.  */
@@ -518,6 +526,10 @@  struct GTY(()) function {
    /* Value histograms attached to particular statements.  */
    htab_t GTY((skip)) value_histograms;

+  /* Map gimple stmt to tree label (or list of labels) for transaction
+     restart and abort.  */
+  htab_t GTY ((param_is (struct tm_restart_node))) tm_restart;
+
    /* For function.c.  */

    /* Points to the FUNCTION_DECL of this function.  */
Index: gcc/emit-rtl.c
===================================================================
--- gcc/emit-rtl.c	(.../trunk)	(revision 180744)
+++ gcc/emit-rtl.c	(.../branches/transactional-memory)	(revision 180773)
@@ -3595,6 +3595,7 @@  try_split (rtx pat, rtx trial, int last)

  	case REG_NORETURN:
  	case REG_SETJMP:
+	case REG_TM:
  	  for (insn = insn_last; insn != NULL_RTX; insn = PREV_INSN (insn))
  	    {
  	      if (CALL_P (insn))
Index: gcc/cfgexpand.c
===================================================================
--- gcc/cfgexpand.c	(.../trunk)	(revision 180744)
+++ gcc/cfgexpand.c	(.../branches/transactional-memory)	(revision 180773)
@@ -2096,6 +2096,32 @@  expand_gimple_stmt (gimple stmt)
  	}
      }

+  /* Mark all calls that can have a transaction restart.  */
+  if (cfun->tm_restart && is_gimple_call (stmt))
+    {
+      struct tm_restart_node dummy;
+      void **slot;
+
+      dummy.stmt = stmt;
+      slot = htab_find_slot (cfun->tm_restart, &dummy, NO_INSERT);
+      if (slot)
+	{
+	  struct tm_restart_node *n = (struct tm_restart_node *) *slot;
+	  tree list = n->label_or_list;
+	  rtx insn;
+
+	  for (insn = next_real_insn (last); !CALL_P (insn);
+	       insn = next_real_insn (insn))
+	    continue;
+
+	  if (TREE_CODE (list) == LABEL_DECL)
+	    add_reg_note (insn, REG_TM, label_rtx (list));
+	  else
+	    for (; list ; list = TREE_CHAIN (list))
+	      add_reg_note (insn, REG_TM, label_rtx (TREE_VALUE (list)));
+	}
+    }
+
    return last;
  }

@@ -4455,6 +4481,10 @@  gimple_expand_cfg (void)
    /* After expanding, the return labels are no longer needed. */
    return_label = NULL;
    naked_return_label = NULL;
+
+  /* After expanding, the tm_restart map is no longer needed.  */
+  cfun->tm_restart = NULL;
+
    /* Tag the blocks with a depth number so that change_scope can find
       the common parent easily.  */
    set_block_levels (DECL_INITIAL (cfun->decl), 0);
Index: gcc/varasm.c
===================================================================
--- gcc/varasm.c	(.../trunk)	(revision 180744)
+++ gcc/varasm.c	(.../branches/transactional-memory)	(revision 180773)
@@ -5859,6 +5859,103 @@  assemble_alias (tree decl, tree target)
      }
  }

+/* Record and output a table of translations from original function
+   to its transaction aware clone.  Note that tm_pure functions are
+   considered to be their own clone.  */
+
+static GTY((if_marked ("tree_map_marked_p"), param_is (struct tree_map)))
+     htab_t tm_clone_pairs;
+
+void
+record_tm_clone_pair (tree o, tree n)
+{
+  struct tree_map **slot, *h;
+
+  if (tm_clone_pairs == NULL)
+    tm_clone_pairs = htab_create_ggc (32, tree_map_hash, tree_map_eq, 0);
+
+  h = ggc_alloc_tree_map ();
+  h->hash = htab_hash_pointer (o);
+  h->base.from = o;
+  h->to = n;
+
+  slot = (struct tree_map **)
+    htab_find_slot_with_hash (tm_clone_pairs, h, h->hash, INSERT);
+  *slot = h;
+}
+
+tree
+get_tm_clone_pair (tree o)
+{
+  if (tm_clone_pairs)
+    {
+      struct tree_map *h, in;
+
+      in.base.from = o;
+      in.hash = htab_hash_pointer (o);
+      h = (struct tree_map *) htab_find_with_hash (tm_clone_pairs,
+						   &in, in.hash);
+      if (h)
+	return h->to;
+    }
+  return NULL_TREE;
+}
+
+/* Helper function for finish_tm_clone_pairs.  Dump the clone table.  */
+
+int
+finish_tm_clone_pairs_1 (void **slot, void *info ATTRIBUTE_UNUSED)
+{
+  struct tree_map *map = (struct tree_map *) *slot;
+  bool *switched = (bool *) info;
+  tree src = map->base.from;
+  tree dst = map->to;
+  struct cgraph_node *src_n = cgraph_get_node (src);
+  struct cgraph_node *dst_n = cgraph_get_node (dst);
+
+  /* The function ipa_tm_create_version() marks the clone as needed if
+     the original function was needed.  But we also mark the clone as
+     needed if we ever called the clone indirectly through
+     TM_GETTMCLONE.  If neither of these are true, we didn't generate
+     a clone, and we didn't call it indirectly... no sense keeping it
+     in the clone table.  */
+  if (!dst_n || !dst_n->needed)
+    return 1;
+
+  /* This covers the case where we have optimized the original
+     function away, and only access the transactional clone.  */
+  if (!src_n || !src_n->needed)
+    return 1;
+
+  if (!*switched)
+    {
+      switch_to_section (get_named_section (NULL, ".tm_clone_table", 3));
+      assemble_align (POINTER_SIZE);
+      *switched = true;
+    }
+
+  assemble_integer (XEXP (DECL_RTL (src), 0),
+		    POINTER_SIZE / BITS_PER_UNIT, POINTER_SIZE, 1);
+  assemble_integer (XEXP (DECL_RTL (dst), 0),
+		    POINTER_SIZE / BITS_PER_UNIT, POINTER_SIZE, 1);
+  return 1;
+}
+
+void
+finish_tm_clone_pairs (void)
+{
+  bool switched = false;
+
+  if (tm_clone_pairs == NULL)
+    return;
+
+  htab_traverse_noresize (tm_clone_pairs, finish_tm_clone_pairs_1,
+			  (void *) &switched);
+  htab_delete (tm_clone_pairs);
+  tm_clone_pairs = NULL;
+}
+
+
  /* Emit an assembler directive to set symbol for DECL visibility to
     the visibility type VIS, which must not be VISIBILITY_DEFAULT.  */

Index: gcc/output.h
===================================================================
--- gcc/output.h	(.../trunk)	(revision 180744)
+++ gcc/output.h	(.../branches/transactional-memory)	(revision 180773)
@@ -606,6 +606,11 @@  extern bool unlikely_text_section_p (sec
  extern void switch_to_section (section *);
  extern void output_section_asm_op (const void *);

+extern void record_tm_clone_pair (tree, tree);
+extern void finish_tm_clone_pairs (void);
+extern int finish_tm_clone_pairs_1 (void **, void *);
+extern tree get_tm_clone_pair (tree);
+
  extern void default_asm_output_source_filename (FILE *, const char *);
  extern void output_file_directive (FILE *, const char *);

Index: gcc/combine.c
===================================================================
--- gcc/combine.c	(.../trunk)	(revision 180744)
+++ gcc/combine.c	(.../branches/transactional-memory)	(revision 180773)
@@ -13286,6 +13286,7 @@  distribute_notes (rtx notes, rtx from_in

  	case REG_NORETURN:
  	case REG_SETJMP:
+	case REG_TM:
  	  /* These notes must remain with the call.  It should not be
  	     possible for both I2 and I3 to be a call.  */
  	  if (CALL_P (i3))
Index: gcc/tree-flow.h
===================================================================
--- gcc/tree-flow.h	(.../trunk)	(revision 180744)
+++ gcc/tree-flow.h	(.../branches/transactional-memory)	(revision 180773)
@@ -778,6 +778,9 @@  extern bool maybe_duplicate_eh_stmt (gim
  extern bool verify_eh_edges (gimple);
  extern bool verify_eh_dispatch_edge (gimple);

+/* In gtm-low.c  */
+extern bool is_transactional_stmt (const_gimple);
+
  /* In tree-ssa-pre.c  */
  struct pre_expr_d;
  void add_to_value (unsigned int, struct pre_expr_d *);
Index: gcc/tree-ssa-structalias.c
===================================================================
--- gcc/tree-ssa-structalias.c	(.../trunk)	(revision 180744)
+++ gcc/tree-ssa-structalias.c	(.../branches/transactional-memory) 
(revision 180773)
@@ -4024,6 +4024,8 @@  find_func_aliases_for_builtin_call (gimp
        case BUILT_IN_STPCPY_CHK:
        case BUILT_IN_STRCAT_CHK:
        case BUILT_IN_STRNCAT_CHK:
+      case BUILT_IN_TM_MEMCPY:
+      case BUILT_IN_TM_MEMMOVE:
  	{
  	  tree res = gimple_call_lhs (t);
  	  tree dest = gimple_call_arg (t, (DECL_FUNCTION_CODE (fndecl)
@@ -4056,6 +4058,7 @@  find_func_aliases_for_builtin_call (gimp
  	}
        case BUILT_IN_MEMSET:
        case BUILT_IN_MEMSET_CHK:
+      case BUILT_IN_TM_MEMSET:
  	{
  	  tree res = gimple_call_lhs (t);
  	  tree dest = gimple_call_arg (t, 0);
@@ -4197,6 +4200,50 @@  find_func_aliases_for_builtin_call (gimp
  	    }
  	  return true;
  	}
+      CASE_BUILT_IN_TM_STORE (1):
+      CASE_BUILT_IN_TM_STORE (2):
+      CASE_BUILT_IN_TM_STORE (4):
+      CASE_BUILT_IN_TM_STORE (8):
+      CASE_BUILT_IN_TM_STORE (FLOAT):
+      CASE_BUILT_IN_TM_STORE (DOUBLE):
+      CASE_BUILT_IN_TM_STORE (LDOUBLE):
+      CASE_BUILT_IN_TM_STORE (M64):
+      CASE_BUILT_IN_TM_STORE (M128):
+      CASE_BUILT_IN_TM_STORE (M256):
+	{
+	  tree addr = gimple_call_arg (t, 0);
+	  tree src = gimple_call_arg (t, 1);
+
+	  get_constraint_for (addr, &lhsc);
+	  do_deref (&lhsc);
+	  get_constraint_for (src, &rhsc);
+	  process_all_all_constraints (lhsc, rhsc);
+	  VEC_free (ce_s, heap, lhsc);
+	  VEC_free (ce_s, heap, rhsc);
+	  return true;
+	}
+      CASE_BUILT_IN_TM_LOAD (1):
+      CASE_BUILT_IN_TM_LOAD (2):
+      CASE_BUILT_IN_TM_LOAD (4):
+      CASE_BUILT_IN_TM_LOAD (8):
+      CASE_BUILT_IN_TM_LOAD (FLOAT):
+      CASE_BUILT_IN_TM_LOAD (DOUBLE):
+      CASE_BUILT_IN_TM_LOAD (LDOUBLE):
+      CASE_BUILT_IN_TM_LOAD (M64):
+      CASE_BUILT_IN_TM_LOAD (M128):
+      CASE_BUILT_IN_TM_LOAD (M256):
+        {
+	  tree dest = gimple_call_lhs (t);
+	  tree addr = gimple_call_arg (t, 0);
+
+	  get_constraint_for (dest, &lhsc);
+	  get_constraint_for (addr, &rhsc);
+	  do_deref (&rhsc);
+	  process_all_all_constraints (lhsc, rhsc);
+	  VEC_free (ce_s, heap, lhsc);
+	  VEC_free (ce_s, heap, rhsc);
+	  return true;
+        }
        /* Variadic argument handling needs to be handled in IPA
  	 mode as well.  */
        case BUILT_IN_VA_START:
Index: gcc/tree-cfg.c
===================================================================
--- gcc/tree-cfg.c	(.../trunk)	(revision 180744)
+++ gcc/tree-cfg.c	(.../branches/transactional-memory)	(revision 180773)
@@ -666,6 +666,15 @@  make_edges (void)
  		}
  	      break;

+	    case GIMPLE_TRANSACTION:
+	      {
+		tree abort_label = gimple_transaction_label (last);
+		if (abort_label)
+		  make_edge (bb, label_to_block (abort_label), 0);
+		fallthru = true;
+	      }
+	      break;
+
  	    default:
  	      gcc_assert (!stmt_ends_bb_p (last));
  	      fallthru = true;
@@ -1196,22 +1205,30 @@  cleanup_dead_labels (void)
    FOR_EACH_BB (bb)
      {
        gimple stmt = last_stmt (bb);
+      tree label, new_label;
+
        if (!stmt)
  	continue;

        switch (gimple_code (stmt))
  	{
  	case GIMPLE_COND:
-	  {
-	    tree true_label = gimple_cond_true_label (stmt);
-	    tree false_label = gimple_cond_false_label (stmt);
+	  label = gimple_cond_true_label (stmt);
+	  if (label)
+	    {
+	      new_label = main_block_label (label);
+	      if (new_label != label)
+		gimple_cond_set_true_label (stmt, new_label);
+	    }

-	    if (true_label)
-	      gimple_cond_set_true_label (stmt, main_block_label (true_label));
-	    if (false_label)
-	      gimple_cond_set_false_label (stmt, main_block_label (false_label));
-	    break;
-	  }
+	  label = gimple_cond_false_label (stmt);
+	  if (label)
+	    {
+	      new_label = main_block_label (label);
+	      if (new_label != label)
+		gimple_cond_set_false_label (stmt, new_label);
+	    }
+	  break;

  	case GIMPLE_SWITCH:
  	  {
@@ -1221,8 +1238,10 @@  cleanup_dead_labels (void)
  	    for (i = 0; i < n; ++i)
  	      {
  		tree case_label = gimple_switch_label (stmt, i);
-		tree label = main_block_label (CASE_LABEL (case_label));
-		CASE_LABEL (case_label) = label;
+		label = CASE_LABEL (case_label);
+		new_label = main_block_label (label);
+		if (new_label != label)
+		  CASE_LABEL (case_label) = new_label;
  	      }
  	    break;
  	  }
@@ -1243,13 +1262,27 @@  cleanup_dead_labels (void)
  	/* We have to handle gotos until they're removed, and we don't
  	   remove them until after we've created the CFG edges.  */
  	case GIMPLE_GOTO:
-          if (!computed_goto_p (stmt))
+	  if (!computed_goto_p (stmt))
  	    {
-	      tree new_dest = main_block_label (gimple_goto_dest (stmt));
-	      gimple_goto_set_dest (stmt, new_dest);
+	      label = gimple_goto_dest (stmt);
+	      new_label = main_block_label (label);
+	      if (new_label != label)
+		gimple_goto_set_dest (stmt, new_label);
  	    }
  	  break;

+	case GIMPLE_TRANSACTION:
+	  {
+	    tree label = gimple_transaction_label (stmt);
+	    if (label)
+	      {
+		tree new_label = main_block_label (label);
+		if (new_label != label)
+		  gimple_transaction_set_label (stmt, new_label);
+	      }
+	  }
+	  break;
+
  	default:
  	  break;
        }
@@ -2263,6 +2296,13 @@  is_ctrl_altering_stmt (gimple t)
  	if (flags & ECF_NORETURN)
  	  return true;

+	/* TM ending statements have backedges out of the transaction.
+	   Return true so we split the basic block containing
+	   them.  */
+	if ((flags & ECF_TM_OPS)
+	    && is_tm_ending_fndecl (gimple_call_fndecl (t)))
+	  return true;
+
  	/* BUILT_IN_RETURN call is same as return statement.  */
  	if (gimple_call_builtin_p (t, BUILT_IN_RETURN))
  	  return true;
@@ -2284,6 +2324,10 @@  is_ctrl_altering_stmt (gimple t)
        /* OpenMP directives alter control flow.  */
        return true;

+    case GIMPLE_TRANSACTION:
+      /* A transaction start alters control flow.  */
+      return true;
+
      default:
        break;
      }
@@ -4054,6 +4098,17 @@  verify_gimple_switch (gimple stmt)
    return false;
  }

+/* Verify the contents of a GIMPLE_TRANSACTION.  Returns true if there
+   is a problem, otherwise false.  */
+
+static bool
+verify_gimple_transaction (gimple stmt)
+{
+  tree lab = gimple_transaction_label (stmt);
+  if (lab != NULL && TREE_CODE (lab) != LABEL_DECL)
+    return true;
+  return false;
+}

  /* Verify a gimple debug statement STMT.
     Returns true if anything is wrong.  */
@@ -4155,6 +4210,9 @@  verify_gimple_stmt (gimple stmt)
      case GIMPLE_ASM:
        return false;

+    case GIMPLE_TRANSACTION:
+      return verify_gimple_transaction (stmt);
+
      /* Tuples that do not have tree operands.  */
      case GIMPLE_NOP:
      case GIMPLE_PREDICT:
@@ -4271,10 +4329,19 @@  verify_gimple_in_seq_2 (gimple_seq stmts
  	  err |= verify_gimple_in_seq_2 (gimple_eh_filter_failure (stmt));
  	  break;

+	case GIMPLE_EH_ELSE:
+	  err |= verify_gimple_in_seq_2 (gimple_eh_else_n_body (stmt));
+	  err |= verify_gimple_in_seq_2 (gimple_eh_else_e_body (stmt));
+	  break;
+
  	case GIMPLE_CATCH:
  	  err |= verify_gimple_in_seq_2 (gimple_catch_handler (stmt));
  	  break;

+	case GIMPLE_TRANSACTION:
+	  err |= verify_gimple_in_seq_2 (gimple_transaction_body (stmt));
+	  break;
+
  	default:
  	  {
  	    bool err2 = verify_gimple_stmt (stmt);
@@ -5052,6 +5119,14 @@  gimple_redirect_edge_and_branch (edge e,
  	redirect_eh_dispatch_edge (stmt, e, dest);
        break;

+    case GIMPLE_TRANSACTION:
+      /* The ABORT edge has a stored label associated with it, otherwise
+	 the edges are simply redirectable.  */
+      /* ??? We don't really need this label after the cfg is created.  */
+      if (e->flags == 0)
+	gimple_transaction_set_label (stmt, gimple_block_label (dest));
+      break;
+
      default:
        /* Otherwise it must be a fallthru edge, and we don't need to
  	 do anything besides redirecting it.  */
@@ -6428,8 +6503,10 @@  dump_function_to_file (tree fn, FILE *fi
    bool ignore_topmost_bind = false, any_var = false;
    basic_block bb;
    tree chain;
+  bool tmclone = TREE_CODE (fn) == FUNCTION_DECL && DECL_IS_TM_CLONE (fn);

-  fprintf (file, "%s (", lang_hooks.decl_printable_name (fn, 2));
+  fprintf (file, "%s %s(", lang_hooks.decl_printable_name (fn, 2),
+	   tmclone ? "[tm-clone] " : "");

    arg = DECL_ARGUMENTS (fn);
    while (arg)
Index: gcc/passes.c
===================================================================
--- gcc/passes.c	(.../trunk)	(revision 180744)
+++ gcc/passes.c	(.../branches/transactional-memory)	(revision 180773)
@@ -1174,9 +1174,11 @@  init_optimization_passes (void)
    p = &all_lowering_passes;
    NEXT_PASS (pass_warn_unused_result);
    NEXT_PASS (pass_diagnose_omp_blocks);
+  NEXT_PASS (pass_diagnose_tm_blocks);
    NEXT_PASS (pass_mudflap_1);
    NEXT_PASS (pass_lower_omp);
    NEXT_PASS (pass_lower_cf);
+  NEXT_PASS (pass_lower_tm);
    NEXT_PASS (pass_refactor_eh);
    NEXT_PASS (pass_lower_eh);
    NEXT_PASS (pass_build_cfg);
@@ -1241,6 +1243,7 @@  init_optimization_passes (void)
      }
    NEXT_PASS (pass_ipa_increase_alignment);
    NEXT_PASS (pass_ipa_matrix_reorg);
+  NEXT_PASS (pass_ipa_tm);
    NEXT_PASS (pass_ipa_lower_emutls);
    *p = NULL;

@@ -1400,6 +1403,13 @@  init_optimization_passes (void)
        NEXT_PASS (pass_uncprop);
        NEXT_PASS (pass_local_pure_const);
      }
+  NEXT_PASS (pass_tm_init);
+    {
+      struct opt_pass **p = &pass_tm_init.pass.sub;
+      NEXT_PASS (pass_tm_mark);
+      NEXT_PASS (pass_tm_memopt);
+      NEXT_PASS (pass_tm_edges);
+    }
    NEXT_PASS (pass_lower_complex_O0);
    NEXT_PASS (pass_cleanup_eh);
    NEXT_PASS (pass_lower_resx);
Index: gcc/reg-notes.def
===================================================================
--- gcc/reg-notes.def	(.../trunk)	(revision 180744)
+++ gcc/reg-notes.def	(.../branches/transactional-memory)	(revision 180773)
@@ -203,6 +203,11 @@  REG_NOTE (CROSSING_JUMP)
     functions that can return twice.  */
  REG_NOTE (SETJMP)

+/* This kind of note is generated at each transactional memory
+   builtin, to indicate we need to generate transaction restart
+   edges for this insn.  */
+REG_NOTE (TM)
+
  /* Indicates the cumulative offset of the stack pointer accounting
     for pushed arguments.  This will only be generated when
     ACCUMULATE_OUTGOING_ARGS is false.  */
Index: gcc/cfgrtl.c
===================================================================
--- gcc/cfgrtl.c	(.../trunk)	(revision 180744)
+++ gcc/cfgrtl.c	(.../branches/transactional-memory)	(revision 180773)
@@ -2246,6 +2246,8 @@  purge_dead_edges (basic_block bb)
  	    ;
  	  else if ((e->flags & EDGE_EH) && can_throw_internal (insn))
  	    ;
+	  else if (flag_tm && find_reg_note (insn, REG_TM, NULL))
+	    ;
  	  else
  	    remove = true;
  	}
Index: gcc/params.def
===================================================================
--- gcc/params.def	(.../trunk)	(revision 180744)
+++ gcc/params.def	(.../branches/transactional-memory)	(revision 180773)
@@ -872,6 +872,13 @@  DEFPARAM (PARAM_IPA_SRA_PTR_GROWTH_FACTO
  	  "a pointer to an aggregate with",
  	  2, 0, 0)

+DEFPARAM (PARAM_TM_MAX_AGGREGATE_SIZE,
+	  "tm-max-aggregate-size",
+	  "Size in bytes after which thread-local aggregates should be "
+	  "instrumented with the logging functions instead of save/restore "
+	  "pairs",
+	  9, 0, 0)
+
  DEFPARAM (PARAM_IPA_CP_VALUE_LIST_SIZE,
  	  "ipa-cp-value-list-size",
  	  "Maximum size of a list of values associated with each parameter for "