19/n: trans-mem: middle end/misc patches (LAST PATCH)

Submitted by Aldy Hernandez on Nov. 3, 2011, 7:32 p.m.

Details

Message ID 4EB2EC3F.6000908@redhat.com
State New
Headers show

Commit Message

Aldy Hernandez Nov. 3, 2011, 7:32 p.m.
This is everything else that doesn't fit neatly into any other category. 
  Here are the middle end changes, as well as pass ordering code, along 
with varasm and a potpourri of other small changes.

This is the last patch.  Please let me know if there is anything else 
(reasonable) you would like me to post.

Comments

Richard Guenther Nov. 4, 2011, 11:14 a.m.
On Thu, Nov 3, 2011 at 8:32 PM, Aldy Hernandez <aldyh@redhat.com> wrote:
> This is everything else that doesn't fit neatly into any other category.
>  Here are the middle end changes, as well as pass ordering code, along with
> varasm and a potpourri of other small changes.
>
> This is the last patch.  Please let me know if there is anything else
> (reasonable) you would like me to post.
>
> Index: gcc/cgraph.h
> ===================================================================
> --- gcc/cgraph.h        (.../trunk)     (revision 180744)
> +++ gcc/cgraph.h        (.../branches/transactional-memory)     (revision
> 180773)
> @@ -98,6 +98,9 @@ struct GTY(()) cgraph_local_info {
>   /* True when the function has been originally extern inline, but it is
>      redefined now.  */
>   unsigned redefined_extern_inline : 1;
> +
> +  /* True if the function may enter serial irrevocable mode.  */
> +  unsigned tm_may_enter_irr : 1;
>  };
>
>  /* Information about the function that needs to be computed globally
> @@ -565,6 +568,8 @@ void verify_cgraph_node (struct cgraph_n
>  void cgraph_build_static_cdtor (char which, tree body, int priority);
>  void cgraph_reset_static_var_maps (void);
>  void init_cgraph (void);
> +struct cgraph_node * cgraph_copy_node_for_versioning (struct cgraph_node *,
> +               tree, VEC(cgraph_edge_p,heap)*, bitmap);
>  struct cgraph_node *cgraph_function_versioning (struct cgraph_node *,
>                                                VEC(cgraph_edge_p,heap)*,
>                                                VEC(ipa_replace_map_p,gc)*,
> Index: gcc/tree-pass.h
> ===================================================================
> --- gcc/tree-pass.h     (.../trunk)     (revision 180744)
> +++ gcc/tree-pass.h     (.../branches/transactional-memory)     (revision
> 180773)
> @@ -447,6 +447,12 @@ extern struct gimple_opt_pass pass_build
>  extern struct gimple_opt_pass pass_local_pure_const;
>  extern struct gimple_opt_pass pass_tracer;
>  extern struct gimple_opt_pass pass_warn_unused_result;
> +extern struct gimple_opt_pass pass_diagnose_tm_blocks;
> +extern struct gimple_opt_pass pass_lower_tm;
> +extern struct gimple_opt_pass pass_tm_init;
> +extern struct gimple_opt_pass pass_tm_mark;
> +extern struct gimple_opt_pass pass_tm_memopt;
> +extern struct gimple_opt_pass pass_tm_edges;
>  extern struct gimple_opt_pass pass_split_functions;
>  extern struct gimple_opt_pass pass_feedback_split_functions;
>
> @@ -469,6 +475,7 @@ extern struct ipa_opt_pass_d pass_ipa_pu
>  extern struct simple_ipa_opt_pass pass_ipa_pta;
>  extern struct ipa_opt_pass_d pass_ipa_lto_wpa_fixup;
>  extern struct ipa_opt_pass_d pass_ipa_lto_finish_out;
> +extern struct simple_ipa_opt_pass pass_ipa_tm;
>  extern struct ipa_opt_pass_d pass_ipa_profile;
>  extern struct ipa_opt_pass_d pass_ipa_cdtor_merge;
>
> Index: gcc/rtlanal.c
> ===================================================================
> --- gcc/rtlanal.c       (.../trunk)     (revision 180744)
> +++ gcc/rtlanal.c       (.../branches/transactional-memory)     (revision
> 180773)
> @@ -1918,6 +1918,7 @@ alloc_reg_note (enum reg_note kind, rtx
>     case REG_CC_USER:
>     case REG_LABEL_TARGET:
>     case REG_LABEL_OPERAND:
> +    case REG_TM:
>       /* These types of register notes use an INSN_LIST rather than an
>         EXPR_LIST, so that copying is done right and dumps look
>         better.  */
> Index: gcc/omp-low.c
> ===================================================================
> --- gcc/omp-low.c       (.../trunk)     (revision 180744)
> +++ gcc/omp-low.c       (.../branches/transactional-memory)     (revision
> 180773)
> @@ -139,6 +139,7 @@ static tree scan_omp_1_op (tree *, int *
>     case GIMPLE_TRY: \
>     case GIMPLE_CATCH: \
>     case GIMPLE_EH_FILTER: \
> +    case GIMPLE_TRANSACTION: \
>       /* The sub-statements for these should be walked.  */ \
>       *handled_ops_p = false; \
>       break;
> Index: gcc/toplev.c
> ===================================================================
> --- gcc/toplev.c        (.../trunk)     (revision 180744)
> +++ gcc/toplev.c        (.../branches/transactional-memory)     (revision
> 180773)
> @@ -599,6 +599,7 @@ compile_file (void)
>
>       output_shared_constant_pool ();
>       output_object_blocks ();
> +  finish_tm_clone_pairs ();
>       /* Write out any pending weak symbol declarations.  */
>       weak_finish ();
> Index: gcc/cgraphunit.c
> ===================================================================
> --- gcc/cgraphunit.c    (.../trunk)     (revision 180744)
> +++ gcc/cgraphunit.c    (.../branches/transactional-memory)     (revision
> 180773)
> @@ -2272,7 +2272,7 @@ update_call_expr (struct cgraph_node *ne
>    was copied to prevent duplications of calls that are dead
>    in the clone.  */
>
> -static struct cgraph_node *
> +struct cgraph_node *
>  cgraph_copy_node_for_versioning (struct cgraph_node *old_version,
>                                 tree new_decl,
>                                 VEC(cgraph_edge_p,heap) *redirect_callers,
> @@ -2286,7 +2286,7 @@ cgraph_copy_node_for_versioning (struct
>
>    new_version = cgraph_create_node (new_decl);
>
> -   new_version->analyzed = true;
> +   new_version->analyzed = old_version->analyzed;

Hm?  analyzed means "with body", sure you have a body if you clone.

>    new_version->local = old_version->local;
>    new_version->local.externally_visible = false;
>    new_version->local.local = true;
> @@ -2294,6 +2294,7 @@ cgraph_copy_node_for_versioning (struct
>    new_version->rtl = old_version->rtl;
>    new_version->reachable = true;
>    new_version->count = old_version->count;
> +   new_version->lowered = true;

OTOH this isn't necessary true.  cgraph exists before lowering.

>    for (e = old_version->callees; e; e=e->next_callee)
>      if (!bbs_to_copy
> @@ -2389,7 +2390,6 @@ cgraph_function_versioning (struct cgrap
>   DECL_VIRTUAL_P (new_version_node->decl) = 0;
>   new_version_node->local.externally_visible = 0;
>   new_version_node->local.local = 1;
> -  new_version_node->lowered = true;
>
>   /* Update the call_expr on the edges to call the new version node. */
>   update_call_expr (new_version_node);
> Index: gcc/tree-ssa-alias.c
> ===================================================================
> --- gcc/tree-ssa-alias.c        (.../trunk)     (revision 180744)
> +++ gcc/tree-ssa-alias.c        (.../branches/transactional-memory)
> (revision 180773)
> @@ -1182,6 +1182,8 @@ ref_maybe_used_by_call_p_1 (gimple call,
>        case BUILT_IN_MEMPCPY:
>        case BUILT_IN_STPCPY:
>        case BUILT_IN_STPNCPY:
> +        case BUILT_IN_TM_MEMCPY:
> +        case BUILT_IN_TM_MEMMOVE:
>          {
>            ao_ref dref;
>            tree size = NULL_TREE;
> @@ -1228,6 +1230,32 @@ ref_maybe_used_by_call_p_1 (gimple call,
>                                           size);
>            return refs_may_alias_p_1 (&dref, ref, false);
>          }
> +
> +        /* The following functions read memory pointed to by their
> +          first argument.  */
> +       CASE_BUILT_IN_TM_LOAD (1):
> +       CASE_BUILT_IN_TM_LOAD (2):
> +       CASE_BUILT_IN_TM_LOAD (4):
> +       CASE_BUILT_IN_TM_LOAD (8):
> +        CASE_BUILT_IN_TM_LOAD (FLOAT):
> +       CASE_BUILT_IN_TM_LOAD (DOUBLE):
> +       CASE_BUILT_IN_TM_LOAD (LDOUBLE):
> +       CASE_BUILT_IN_TM_LOAD (M64):
> +       CASE_BUILT_IN_TM_LOAD (M128):
> +       CASE_BUILT_IN_TM_LOAD (M256):
> +        case BUILT_IN_TM_LOG:
> +        case BUILT_IN_TM_LOG_1:
> +        case BUILT_IN_TM_LOG_2:
> +        case BUILT_IN_TM_LOG_4:
> +        case BUILT_IN_TM_LOG_8:
> +        case BUILT_IN_TM_LOG_FLOAT:
> +        case BUILT_IN_TM_LOG_DOUBLE:
> +        case BUILT_IN_TM_LOG_LDOUBLE:
> +        case BUILT_IN_TM_LOG_M64:
> +        case BUILT_IN_TM_LOG_M128:
> +        case BUILT_IN_TM_LOG_M256:
> +         return ptr_deref_may_alias_ref_p_1 (gimple_call_arg (call, 0),
> ref);
> +
>        /* These read memory pointed to by the first argument.  */
>        case BUILT_IN_STRDUP:
>        case BUILT_IN_STRNDUP:
> @@ -1250,6 +1278,7 @@ ref_maybe_used_by_call_p_1 (gimple call,
>        case BUILT_IN_STACK_SAVE:
>        case BUILT_IN_STACK_RESTORE:
>        case BUILT_IN_MEMSET:
> +        case BUILT_IN_TM_MEMSET:
>        case BUILT_IN_MEMSET_CHK:
>        case BUILT_IN_FREXP:
>        case BUILT_IN_FREXPF:
> @@ -1480,6 +1509,19 @@ call_may_clobber_ref_p_1 (gimple call, a
>        case BUILT_IN_STRCAT:
>        case BUILT_IN_STRNCAT:
>        case BUILT_IN_MEMSET:
> +        case BUILT_IN_TM_MEMSET:
> +        CASE_BUILT_IN_TM_STORE (1):
> +        CASE_BUILT_IN_TM_STORE (2):
> +        CASE_BUILT_IN_TM_STORE (4):
> +        CASE_BUILT_IN_TM_STORE (8):
> +        CASE_BUILT_IN_TM_STORE (FLOAT):
> +        CASE_BUILT_IN_TM_STORE (DOUBLE):
> +        CASE_BUILT_IN_TM_STORE (LDOUBLE):
> +        CASE_BUILT_IN_TM_STORE (M64):
> +        CASE_BUILT_IN_TM_STORE (M128):
> +        CASE_BUILT_IN_TM_STORE (M256):
> +        case BUILT_IN_TM_MEMCPY:
> +        case BUILT_IN_TM_MEMMOVE:
>          {
>            ao_ref dref;
>            tree size = NULL_TREE;
> Index: gcc/ipa-inline.c
> ===================================================================
> --- gcc/ipa-inline.c    (.../trunk)     (revision 180744)
> +++ gcc/ipa-inline.c    (.../branches/transactional-memory)     (revision
> 180773)
> @@ -284,6 +284,15 @@ can_inline_edge_p (struct cgraph_edge *e
>       e->inline_failed = CIF_EH_PERSONALITY;
>       inlinable = false;
>     }
> +  /* TM pure functions should not get inlined if the outer function is
> +     a TM safe function.  */
> +  else if (flag_tm

Please move flag checks into the respective prediates.  Any reason
why the is_tm_pure () predicate wouldn't already do the correct thing
with !flag_tm?

> +          && is_tm_pure (callee->decl)
> +          && is_tm_safe (e->caller->decl))
> +    {
> +      e->inline_failed = CIF_UNSPECIFIED;
> +      inlinable = false;
> +    }
>   /* Don't inline if the callee can throw non-call exceptions but the
>      caller cannot.
>      FIXME: this is obviously wrong for LTO where STRUCT_FUNCTION is
> missing.
> Index: gcc/crtstuff.c
> ===================================================================
> --- gcc/crtstuff.c      (.../trunk)     (revision 180744)
> +++ gcc/crtstuff.c      (.../branches/transactional-memory)     (revision
> 180773)
> @@ -162,6 +162,9 @@ extern void __do_global_ctors_1 (void);
>  /* Likewise for _Jv_RegisterClasses.  */
>  extern void _Jv_RegisterClasses (void *) TARGET_ATTRIBUTE_WEAK;
>
> +extern void _ITM_registerTMCloneTable (void *, size_t)
> TARGET_ATTRIBUTE_WEAK;
> +extern void _ITM_deregisterTMCloneTable (void *) TARGET_ATTRIBUTE_WEAK;
> +
>  #ifdef OBJECT_FORMAT_ELF
>
>  /*  Declare a pointer to void function type.  */
> @@ -241,6 +244,11 @@ STATIC void *__JCR_LIST__[]
>   = { };
>  #endif /* JCR_SECTION_NAME */
>
> +STATIC func_ptr __TMC_LIST__[]
> +  __attribute__((unused, section(".tm_clone_table"),
> aligned(sizeof(void*))))
> +  = { };
> +extern func_ptr __TMC_END__[] __attribute__((__visibility__ ("hidden")));
> +
>  #if defined(INIT_SECTION_ASM_OP) || defined(INIT_ARRAY_SECTION_ASM_OP)
>
>  #ifdef OBJECT_FORMAT_ELF
> @@ -330,6 +338,13 @@ __do_global_dtors_aux (void)
>   }
>  #endif /* !defined(FINI_ARRAY_SECTION_ASM_OP) */
>
> +  if (_ITM_deregisterTMCloneTable)
> +    {
> +      size_t size = (size_t)(__TMC_END__ - __TMC_LIST__) / 2;
> +      if (size > 0)
> +       _ITM_deregisterTMCloneTable (__TMC_LIST__);
> +    }
> +
>  #ifdef USE_EH_FRAME_REGISTRY
>  #ifdef CRT_GET_RFIB_DATA
>   /* If we used the new __register_frame_info_bases interface,
> @@ -391,6 +406,12 @@ frame_dummy (void)
>        register_classes (__JCR_LIST__);
>     }
>  #endif /* JCR_SECTION_NAME */
> +  if (_ITM_registerTMCloneTable)
> +    {
> +      size_t size = (size_t)(__TMC_END__ - __TMC_LIST__) / 2;
> +      if (size > 0)
> +       _ITM_registerTMCloneTable (__TMC_LIST__, size);
> +    }
>  }
>
>  #ifdef INIT_SECTION_ASM_OP
> @@ -457,6 +478,13 @@ __do_global_dtors (void)
>   for (p = __DTOR_LIST__ + 1; (f = *p); p++)
>     f ();
>
> +  if (_ITM_deregisterTMCloneTable)
> +    {
> +      size_t size = (size_t)(__TMC_END__ - __TMC_LIST__) / 2;
> +      if (size > 0)
> +       _ITM_deregisterTMCloneTable (__TMC_LIST__);
> +    }
> +
>  #ifdef USE_EH_FRAME_REGISTRY
>   if (__deregister_frame_info)
>     __deregister_frame_info (__EH_FRAME_BEGIN__);
> @@ -570,6 +598,11 @@ STATIC void *__JCR_END__[1]
>    = { 0 };
>  #endif /* JCR_SECTION_NAME */
>
> +func_ptr __TMC_END__[]
> +  __attribute__((unused, section(".tm_clone_table"), aligned(sizeof(void
> *)),
> +                __visibility__ ("hidden")))
> +  = { };
> +
>  #ifdef INIT_ARRAY_SECTION_ASM_OP
>
>  /* If we are using .init_array, there is nothing to do.  */
> Index: gcc/cfgbuild.c
> ===================================================================
> --- gcc/cfgbuild.c      (.../trunk)     (revision 180744)
> +++ gcc/cfgbuild.c      (.../branches/transactional-memory)     (revision
> 180773)
> @@ -338,18 +338,30 @@ make_edges (basic_block min, basic_block
>          /* Add any appropriate EH edges.  */
>          rtl_make_eh_edge (edge_cache, bb, insn);
>
> -         if (code == CALL_INSN && nonlocal_goto_handler_labels)
> +         if (code == CALL_INSN)
>            {
> -             /* ??? This could be made smarter: in some cases it's possible
> -                to tell that certain calls will not do a nonlocal goto.
> -                For example, if the nested functions that do the nonlocal
> -                gotos do not have their addresses taken, then only calls to
> -                those functions or to other nested functions that use them
> -                could possibly do nonlocal gotos.  */
>              if (can_nonlocal_goto (insn))
> -               for (x = nonlocal_goto_handler_labels; x; x = XEXP (x, 1))
> -                 make_label_edge (edge_cache, bb, XEXP (x, 0),
> -                                  EDGE_ABNORMAL | EDGE_ABNORMAL_CALL);
> +               {
> +                 /* ??? This could be made smarter: in some cases it's
> +                    possible to tell that certain calls will not do a
> +                    nonlocal goto.  For example, if the nested functions
> +                    that do the nonlocal gotos do not have their addresses
> +                    taken, then only calls to those functions or to other
> +                    nested functions that use them could possibly do
> +                    nonlocal gotos.  */
> +                 for (x = nonlocal_goto_handler_labels; x; x = XEXP (x, 1))
> +                   make_label_edge (edge_cache, bb, XEXP (x, 0),
> +                                    EDGE_ABNORMAL | EDGE_ABNORMAL_CALL);
> +               }
> +
> +             if (flag_tm)
> +               {
> +                 rtx note;
> +                 for (note = REG_NOTES (insn); note; note = XEXP (note, 1))
> +                   if (REG_NOTE_KIND (note) == REG_TM)
> +                     make_label_edge (edge_cache, bb, XEXP (note, 0),
> +                                      EDGE_ABNORMAL | EDGE_ABNORMAL_CALL);
> +               }
>            }
>        }
>
> Index: gcc/timevar.def
> ===================================================================
> --- gcc/timevar.def     (.../trunk)     (revision 180744)
> +++ gcc/timevar.def     (.../branches/transactional-memory)     (revision
> 180773)
> @@ -184,6 +184,7 @@ DEFTIMEVAR (TV_TREE_COPY_RENAME          , "
>  DEFTIMEVAR (TV_TREE_SSA_VERIFY       , "tree SSA verifier")
>  DEFTIMEVAR (TV_TREE_STMT_VERIFY      , "tree STMT verifier")
>  DEFTIMEVAR (TV_TREE_SWITCH_CONVERSION, "tree switch initialization
> conversion")
> +DEFTIMEVAR (TV_TRANS_MEM             , "transactional memory")
>  DEFTIMEVAR (TV_TREE_STRLEN           , "tree strlen optimization")
>  DEFTIMEVAR (TV_CGRAPH_VERIFY         , "callgraph verifier")
>  DEFTIMEVAR (TV_DOM_FRONTIERS         , "dominance frontiers")
> Index: gcc/recog.c
> ===================================================================
> --- gcc/recog.c (.../trunk)     (revision 180744)
> +++ gcc/recog.c (.../branches/transactional-memory)     (revision 180773)
> @@ -3287,6 +3287,7 @@ peep2_attempt (basic_block bb, rtx insn,
>          {
>          case REG_NORETURN:
>          case REG_SETJMP:
> +         case REG_TM:
>            add_reg_note (new_insn, REG_NOTE_KIND (note),
>                          XEXP (note, 0));
>            break;
> Index: gcc/function.h
> ===================================================================
> --- gcc/function.h      (.../trunk)     (revision 180744)
> +++ gcc/function.h      (.../branches/transactional-memory)     (revision
> 180773)
> @@ -467,6 +467,14 @@ extern GTY(()) struct rtl_data x_rtl;
>    want to do differently.  */
>  #define crtl (&x_rtl)
>
> +/* This structure is used to map a gimple statement to a label,
> +   or list of labels to represent transaction restart.  */
> +
> +struct GTY(()) tm_restart_node {
> +  gimple stmt;
> +  tree label_or_list;
> +};
> +
>  struct GTY(()) stack_usage
>  {
>   /* # of bytes of static stack space allocated by the function.  */
> @@ -518,6 +526,10 @@ struct GTY(()) function {
>   /* Value histograms attached to particular statements.  */
>   htab_t GTY((skip)) value_histograms;
>
> +  /* Map gimple stmt to tree label (or list of labels) for transaction
> +     restart and abort.  */
> +  htab_t GTY ((param_is (struct tm_restart_node))) tm_restart;
> +

As this maps 'gimple' to tree shouldn't this go to fn->gimple_df instead?
That way you avoid growing generic struct function.  Or in to eh_status,
if that looks like a better fit.

>   /* For function.c.  */
>
>   /* Points to the FUNCTION_DECL of this function.  */
> Index: gcc/emit-rtl.c
> ===================================================================
> --- gcc/emit-rtl.c      (.../trunk)     (revision 180744)
> +++ gcc/emit-rtl.c      (.../branches/transactional-memory)     (revision
> 180773)
> @@ -3595,6 +3595,7 @@ try_split (rtx pat, rtx trial, int last)
>
>        case REG_NORETURN:
>        case REG_SETJMP:
> +       case REG_TM:
>          for (insn = insn_last; insn != NULL_RTX; insn = PREV_INSN (insn))
>            {
>              if (CALL_P (insn))
> Index: gcc/cfgexpand.c
> ===================================================================
> --- gcc/cfgexpand.c     (.../trunk)     (revision 180744)
> +++ gcc/cfgexpand.c     (.../branches/transactional-memory)     (revision
> 180773)
> @@ -2096,6 +2096,32 @@ expand_gimple_stmt (gimple stmt)
>        }
>     }
>
> +  /* Mark all calls that can have a transaction restart.  */

Why isn't this done when we expand the call?  This walking of the
RTL sequence looks like a hack (an easy one, albeit).

> +  if (cfun->tm_restart && is_gimple_call (stmt))
> +    {
> +      struct tm_restart_node dummy;
> +      void **slot;
> +
> +      dummy.stmt = stmt;
> +      slot = htab_find_slot (cfun->tm_restart, &dummy, NO_INSERT);
> +      if (slot)
> +       {
> +         struct tm_restart_node *n = (struct tm_restart_node *) *slot;
> +         tree list = n->label_or_list;
> +         rtx insn;
> +
> +         for (insn = next_real_insn (last); !CALL_P (insn);
> +              insn = next_real_insn (insn))
> +           continue;
> +
> +         if (TREE_CODE (list) == LABEL_DECL)
> +           add_reg_note (insn, REG_TM, label_rtx (list));
> +         else
> +           for (; list ; list = TREE_CHAIN (list))
> +             add_reg_note (insn, REG_TM, label_rtx (TREE_VALUE (list)));
> +       }
> +    }
> +
>   return last;
>  }
>
> @@ -4455,6 +4481,10 @@ gimple_expand_cfg (void)
>   /* After expanding, the return labels are no longer needed. */
>   return_label = NULL;
>   naked_return_label = NULL;
> +
> +  /* After expanding, the tm_restart map is no longer needed.  */
> +  cfun->tm_restart = NULL;

You should still free it, to not confuse the statistics code I think.

> +
>   /* Tag the blocks with a depth number so that change_scope can find
>      the common parent easily.  */
>   set_block_levels (DECL_INITIAL (cfun->decl), 0);
> Index: gcc/varasm.c
> ===================================================================
> --- gcc/varasm.c        (.../trunk)     (revision 180744)
> +++ gcc/varasm.c        (.../branches/transactional-memory)     (revision
> 180773)
> @@ -5859,6 +5859,103 @@ assemble_alias (tree decl, tree target)
>     }
>  }
>
> +/* Record and output a table of translations from original function
> +   to its transaction aware clone.  Note that tm_pure functions are
> +   considered to be their own clone.  */
> +
> +static GTY((if_marked ("tree_map_marked_p"), param_is (struct tree_map)))
> +     htab_t tm_clone_pairs;
> +
> +void
> +record_tm_clone_pair (tree o, tree n)
> +{
> +  struct tree_map **slot, *h;
> +
> +  if (tm_clone_pairs == NULL)
> +    tm_clone_pairs = htab_create_ggc (32, tree_map_hash, tree_map_eq, 0);
> +
> +  h = ggc_alloc_tree_map ();
> +  h->hash = htab_hash_pointer (o);
> +  h->base.from = o;
> +  h->to = n;
> +
> +  slot = (struct tree_map **)
> +    htab_find_slot_with_hash (tm_clone_pairs, h, h->hash, INSERT);
> +  *slot = h;
> +}
> +
> +tree
> +get_tm_clone_pair (tree o)
> +{
> +  if (tm_clone_pairs)
> +    {
> +      struct tree_map *h, in;
> +
> +      in.base.from = o;
> +      in.hash = htab_hash_pointer (o);
> +      h = (struct tree_map *) htab_find_with_hash (tm_clone_pairs,
> +                                                  &in, in.hash);
> +      if (h)
> +       return h->to;
> +    }
> +  return NULL_TREE;
> +}
> +
> +/* Helper function for finish_tm_clone_pairs.  Dump the clone table.  */
> +
> +int
> +finish_tm_clone_pairs_1 (void **slot, void *info ATTRIBUTE_UNUSED)
> +{
> +  struct tree_map *map = (struct tree_map *) *slot;
> +  bool *switched = (bool *) info;
> +  tree src = map->base.from;
> +  tree dst = map->to;
> +  struct cgraph_node *src_n = cgraph_get_node (src);
> +  struct cgraph_node *dst_n = cgraph_get_node (dst);
> +
> +  /* The function ipa_tm_create_version() marks the clone as needed if
> +     the original function was needed.  But we also mark the clone as
> +     needed if we ever called the clone indirectly through
> +     TM_GETTMCLONE.  If neither of these are true, we didn't generate
> +     a clone, and we didn't call it indirectly... no sense keeping it
> +     in the clone table.  */
> +  if (!dst_n || !dst_n->needed)
> +    return 1;
> +
> +  /* This covers the case where we have optimized the original
> +     function away, and only access the transactional clone.  */
> +  if (!src_n || !src_n->needed)
> +    return 1;
> +
> +  if (!*switched)
> +    {
> +      switch_to_section (get_named_section (NULL, ".tm_clone_table", 3));
> +      assemble_align (POINTER_SIZE);
> +      *switched = true;
> +    }
> +
> +  assemble_integer (XEXP (DECL_RTL (src), 0),
> +                   POINTER_SIZE / BITS_PER_UNIT, POINTER_SIZE, 1);
> +  assemble_integer (XEXP (DECL_RTL (dst), 0),
> +                   POINTER_SIZE / BITS_PER_UNIT, POINTER_SIZE, 1);
> +  return 1;
> +}
> +
> +void
> +finish_tm_clone_pairs (void)
> +{
> +  bool switched = false;
> +
> +  if (tm_clone_pairs == NULL)
> +    return;
> +
> +  htab_traverse_noresize (tm_clone_pairs, finish_tm_clone_pairs_1,
> +                         (void *) &switched);

This makes the generated table dependent on memory layout.  You
need to walk the pairs in some deterministic order.  In fact why not
walk all cgraph_nodes looking for the pairs - they should be still
in the list of clones for a node and you've marked it with DECL_TM_CLONE.
You can then sort them by cgraph node uid.

Did you check bootstrapping GCC with TM enabled and address-space
randomization turned on?

> +  htab_delete (tm_clone_pairs);
> +  tm_clone_pairs = NULL;
> +}
> +
> +
>  /* Emit an assembler directive to set symbol for DECL visibility to
>    the visibility type VIS, which must not be VISIBILITY_DEFAULT.  */
>
> Index: gcc/output.h
> ===================================================================
> --- gcc/output.h        (.../trunk)     (revision 180744)
> +++ gcc/output.h        (.../branches/transactional-memory)     (revision
> 180773)
> @@ -606,6 +606,11 @@ extern bool unlikely_text_section_p (sec
>  extern void switch_to_section (section *);
>  extern void output_section_asm_op (const void *);
>
> +extern void record_tm_clone_pair (tree, tree);
> +extern void finish_tm_clone_pairs (void);
> +extern int finish_tm_clone_pairs_1 (void **, void *);
> +extern tree get_tm_clone_pair (tree);
> +
>  extern void default_asm_output_source_filename (FILE *, const char *);
>  extern void output_file_directive (FILE *, const char *);
>
> Index: gcc/combine.c
> ===================================================================
> --- gcc/combine.c       (.../trunk)     (revision 180744)
> +++ gcc/combine.c       (.../branches/transactional-memory)     (revision
> 180773)
> @@ -13286,6 +13286,7 @@ distribute_notes (rtx notes, rtx from_in
>
>        case REG_NORETURN:
>        case REG_SETJMP:
> +       case REG_TM:
>          /* These notes must remain with the call.  It should not be
>             possible for both I2 and I3 to be a call.  */
>          if (CALL_P (i3))
> Index: gcc/tree-flow.h
> ===================================================================
> --- gcc/tree-flow.h     (.../trunk)     (revision 180744)
> +++ gcc/tree-flow.h     (.../branches/transactional-memory)     (revision
> 180773)
> @@ -778,6 +778,9 @@ extern bool maybe_duplicate_eh_stmt (gim
>  extern bool verify_eh_edges (gimple);
>  extern bool verify_eh_dispatch_edge (gimple);
>
> +/* In gtm-low.c  */
> +extern bool is_transactional_stmt (const_gimple);
> +

gimple.h please.  looks like a gimple predicate as well, so the implementation
should be in gimple.c?

>  /* In tree-ssa-pre.c  */
>  struct pre_expr_d;
>  void add_to_value (unsigned int, struct pre_expr_d *);
> Index: gcc/tree-ssa-structalias.c
> ===================================================================
> --- gcc/tree-ssa-structalias.c  (.../trunk)     (revision 180744)
> +++ gcc/tree-ssa-structalias.c  (.../branches/transactional-memory)
> (revision 180773)
> @@ -4024,6 +4024,8 @@ find_func_aliases_for_builtin_call (gimp
>       case BUILT_IN_STPCPY_CHK:
>       case BUILT_IN_STRCAT_CHK:
>       case BUILT_IN_STRNCAT_CHK:
> +      case BUILT_IN_TM_MEMCPY:
> +      case BUILT_IN_TM_MEMMOVE:
>        {
>          tree res = gimple_call_lhs (t);
>          tree dest = gimple_call_arg (t, (DECL_FUNCTION_CODE (fndecl)
> @@ -4056,6 +4058,7 @@ find_func_aliases_for_builtin_call (gimp
>        }
>       case BUILT_IN_MEMSET:
>       case BUILT_IN_MEMSET_CHK:
> +      case BUILT_IN_TM_MEMSET:
>        {
>          tree res = gimple_call_lhs (t);
>          tree dest = gimple_call_arg (t, 0);
> @@ -4197,6 +4200,50 @@ find_func_aliases_for_builtin_call (gimp
>            }
>          return true;
>        }
> +      CASE_BUILT_IN_TM_STORE (1):
> +      CASE_BUILT_IN_TM_STORE (2):
> +      CASE_BUILT_IN_TM_STORE (4):
> +      CASE_BUILT_IN_TM_STORE (8):
> +      CASE_BUILT_IN_TM_STORE (FLOAT):
> +      CASE_BUILT_IN_TM_STORE (DOUBLE):
> +      CASE_BUILT_IN_TM_STORE (LDOUBLE):
> +      CASE_BUILT_IN_TM_STORE (M64):
> +      CASE_BUILT_IN_TM_STORE (M128):
> +      CASE_BUILT_IN_TM_STORE (M256):
> +       {
> +         tree addr = gimple_call_arg (t, 0);
> +         tree src = gimple_call_arg (t, 1);
> +
> +         get_constraint_for (addr, &lhsc);
> +         do_deref (&lhsc);
> +         get_constraint_for (src, &rhsc);
> +         process_all_all_constraints (lhsc, rhsc);
> +         VEC_free (ce_s, heap, lhsc);
> +         VEC_free (ce_s, heap, rhsc);
> +         return true;
> +       }
> +      CASE_BUILT_IN_TM_LOAD (1):
> +      CASE_BUILT_IN_TM_LOAD (2):
> +      CASE_BUILT_IN_TM_LOAD (4):
> +      CASE_BUILT_IN_TM_LOAD (8):
> +      CASE_BUILT_IN_TM_LOAD (FLOAT):
> +      CASE_BUILT_IN_TM_LOAD (DOUBLE):
> +      CASE_BUILT_IN_TM_LOAD (LDOUBLE):
> +      CASE_BUILT_IN_TM_LOAD (M64):
> +      CASE_BUILT_IN_TM_LOAD (M128):
> +      CASE_BUILT_IN_TM_LOAD (M256):
> +        {
> +         tree dest = gimple_call_lhs (t);
> +         tree addr = gimple_call_arg (t, 0);
> +
> +         get_constraint_for (dest, &lhsc);
> +         get_constraint_for (addr, &rhsc);
> +         do_deref (&rhsc);
> +         process_all_all_constraints (lhsc, rhsc);
> +         VEC_free (ce_s, heap, lhsc);
> +         VEC_free (ce_s, heap, rhsc);
> +         return true;
> +        }
>       /* Variadic argument handling needs to be handled in IPA
>         mode as well.  */
>       case BUILT_IN_VA_START:
> Index: gcc/tree-cfg.c
> ===================================================================
> --- gcc/tree-cfg.c      (.../trunk)     (revision 180744)
> +++ gcc/tree-cfg.c      (.../branches/transactional-memory)     (revision
> 180773)
> @@ -666,6 +666,15 @@ make_edges (void)
>                }
>              break;
>
> +           case GIMPLE_TRANSACTION:
> +             {
> +               tree abort_label = gimple_transaction_label (last);
> +               if (abort_label)
> +                 make_edge (bb, label_to_block (abort_label), 0);
> +               fallthru = true;
> +             }
> +             break;
> +
>            default:
>              gcc_assert (!stmt_ends_bb_p (last));
>              fallthru = true;
> @@ -1196,22 +1205,30 @@ cleanup_dead_labels (void)
>   FOR_EACH_BB (bb)
>     {
>       gimple stmt = last_stmt (bb);
> +      tree label, new_label;
> +
>       if (!stmt)
>        continue;
>
>       switch (gimple_code (stmt))
>        {
>        case GIMPLE_COND:
> -         {
> -           tree true_label = gimple_cond_true_label (stmt);
> -           tree false_label = gimple_cond_false_label (stmt);
> +         label = gimple_cond_true_label (stmt);
> +         if (label)
> +           {
> +             new_label = main_block_label (label);
> +             if (new_label != label)
> +               gimple_cond_set_true_label (stmt, new_label);
> +           }
>
> -           if (true_label)
> -             gimple_cond_set_true_label (stmt, main_block_label
> (true_label));
> -           if (false_label)
> -             gimple_cond_set_false_label (stmt, main_block_label
> (false_label));
> -           break;
> -         }
> +         label = gimple_cond_false_label (stmt);
> +         if (label)
> +           {
> +             new_label = main_block_label (label);
> +             if (new_label != label)
> +               gimple_cond_set_false_label (stmt, new_label);
> +           }
> +         break;
>
>        case GIMPLE_SWITCH:
>          {
> @@ -1221,8 +1238,10 @@ cleanup_dead_labels (void)
>            for (i = 0; i < n; ++i)
>              {
>                tree case_label = gimple_switch_label (stmt, i);
> -               tree label = main_block_label (CASE_LABEL (case_label));
> -               CASE_LABEL (case_label) = label;
> +               label = CASE_LABEL (case_label);
> +               new_label = main_block_label (label);
> +               if (new_label != label)
> +                 CASE_LABEL (case_label) = new_label;
>              }
>            break;
>          }
> @@ -1243,13 +1262,27 @@ cleanup_dead_labels (void)
>        /* We have to handle gotos until they're removed, and we don't
>           remove them until after we've created the CFG edges.  */
>        case GIMPLE_GOTO:
> -          if (!computed_goto_p (stmt))
> +         if (!computed_goto_p (stmt))
>            {
> -             tree new_dest = main_block_label (gimple_goto_dest (stmt));
> -             gimple_goto_set_dest (stmt, new_dest);
> +             label = gimple_goto_dest (stmt);
> +             new_label = main_block_label (label);
> +             if (new_label != label)
> +               gimple_goto_set_dest (stmt, new_label);

What's the reason for this changes?  Optimization?

>            }
>          break;
>
> +       case GIMPLE_TRANSACTION:
> +         {
> +           tree label = gimple_transaction_label (stmt);
> +           if (label)
> +             {
> +               tree new_label = main_block_label (label);
> +               if (new_label != label)
> +                 gimple_transaction_set_label (stmt, new_label);
> +             }
> +         }
> +         break;
> +
>        default:
>          break;
>       }
> @@ -2263,6 +2296,13 @@ is_ctrl_altering_stmt (gimple t)
>        if (flags & ECF_NORETURN)
>          return true;
>
> +       /* TM ending statements have backedges out of the transaction.
> +          Return true so we split the basic block containing
> +          them.  */
> +       if ((flags & ECF_TM_OPS)
> +           && is_tm_ending_fndecl (gimple_call_fndecl (t)))
> +         return true;
> +
>        /* BUILT_IN_RETURN call is same as return statement.  */
>        if (gimple_call_builtin_p (t, BUILT_IN_RETURN))
>          return true;
> @@ -2284,6 +2324,10 @@ is_ctrl_altering_stmt (gimple t)
>       /* OpenMP directives alter control flow.  */
>       return true;
>
> +    case GIMPLE_TRANSACTION:
> +      /* A transaction start alters control flow.  */
> +      return true;
> +
>     default:
>       break;
>     }
> @@ -4054,6 +4098,17 @@ verify_gimple_switch (gimple stmt)
>   return false;
>  }
>
> +/* Verify the contents of a GIMPLE_TRANSACTION.  Returns true if there
> +   is a problem, otherwise false.  */
> +
> +static bool
> +verify_gimple_transaction (gimple stmt)
> +{
> +  tree lab = gimple_transaction_label (stmt);
> +  if (lab != NULL && TREE_CODE (lab) != LABEL_DECL)
> +    return true;

ISTR this has substatements, so you should handle this in
verify_gimple_in_seq_2 and make sure to verify those substatements.

> +  return false;
> +}
>
>  /* Verify a gimple debug statement STMT.
>    Returns true if anything is wrong.  */
> @@ -4155,6 +4210,9 @@ verify_gimple_stmt (gimple stmt)
>     case GIMPLE_ASM:
>       return false;
>
> +    case GIMPLE_TRANSACTION:
> +      return verify_gimple_transaction (stmt);
> +

Not here.

>     /* Tuples that do not have tree operands.  */
>     case GIMPLE_NOP:
>     case GIMPLE_PREDICT:
> @@ -4271,10 +4329,19 @@ verify_gimple_in_seq_2 (gimple_seq stmts
>          err |= verify_gimple_in_seq_2 (gimple_eh_filter_failure (stmt));
>          break;
>
> +       case GIMPLE_EH_ELSE:
> +         err |= verify_gimple_in_seq_2 (gimple_eh_else_n_body (stmt));
> +         err |= verify_gimple_in_seq_2 (gimple_eh_else_e_body (stmt));
> +         break;
> +
>        case GIMPLE_CATCH:
>          err |= verify_gimple_in_seq_2 (gimple_catch_handler (stmt));
>          break;
>
> +       case GIMPLE_TRANSACTION:
> +         err |= verify_gimple_in_seq_2 (gimple_transaction_body (stmt));
> +         break;
> +

Ah, you do.  But you'll never call your label verification code.

>        default:
>          {
>            bool err2 = verify_gimple_stmt (stmt);
> @@ -5052,6 +5119,14 @@ gimple_redirect_edge_and_branch (edge e,
>        redirect_eh_dispatch_edge (stmt, e, dest);
>       break;
>
> +    case GIMPLE_TRANSACTION:
> +      /* The ABORT edge has a stored label associated with it, otherwise
> +        the edges are simply redirectable.  */
> +      /* ??? We don't really need this label after the cfg is created.  */
> +      if (e->flags == 0)
> +       gimple_transaction_set_label (stmt, gimple_block_label (dest));

So why set it (and thus keep it live)?

> +      break;
> +
>     default:
>       /* Otherwise it must be a fallthru edge, and we don't need to
>         do anything besides redirecting it.  */
> @@ -6428,8 +6503,10 @@ dump_function_to_file (tree fn, FILE *fi
>   bool ignore_topmost_bind = false, any_var = false;
>   basic_block bb;
>   tree chain;
> +  bool tmclone = TREE_CODE (fn) == FUNCTION_DECL && DECL_IS_TM_CLONE (fn);
>
> -  fprintf (file, "%s (", lang_hooks.decl_printable_name (fn, 2));
> +  fprintf (file, "%s %s(", lang_hooks.decl_printable_name (fn, 2),
> +          tmclone ? "[tm-clone] " : "");
>
>   arg = DECL_ARGUMENTS (fn);
>   while (arg)
> Index: gcc/passes.c
> ===================================================================
> --- gcc/passes.c        (.../trunk)     (revision 180744)
> +++ gcc/passes.c        (.../branches/transactional-memory)     (revision
> 180773)
> @@ -1174,9 +1174,11 @@ init_optimization_passes (void)
>   p = &all_lowering_passes;
>   NEXT_PASS (pass_warn_unused_result);
>   NEXT_PASS (pass_diagnose_omp_blocks);
> +  NEXT_PASS (pass_diagnose_tm_blocks);
>   NEXT_PASS (pass_mudflap_1);
>   NEXT_PASS (pass_lower_omp);
>   NEXT_PASS (pass_lower_cf);
> +  NEXT_PASS (pass_lower_tm);
>   NEXT_PASS (pass_refactor_eh);
>   NEXT_PASS (pass_lower_eh);
>   NEXT_PASS (pass_build_cfg);
> @@ -1241,6 +1243,7 @@ init_optimization_passes (void)
>     }
>   NEXT_PASS (pass_ipa_increase_alignment);
>   NEXT_PASS (pass_ipa_matrix_reorg);
> +  NEXT_PASS (pass_ipa_tm);
>   NEXT_PASS (pass_ipa_lower_emutls);
>   *p = NULL;
>
> @@ -1400,6 +1403,13 @@ init_optimization_passes (void)
>       NEXT_PASS (pass_uncprop);
>       NEXT_PASS (pass_local_pure_const);
>     }
> +  NEXT_PASS (pass_tm_init);
> +    {
> +      struct opt_pass **p = &pass_tm_init.pass.sub;
> +      NEXT_PASS (pass_tm_mark);
> +      NEXT_PASS (pass_tm_memopt);
> +      NEXT_PASS (pass_tm_edges);
> +    }
>   NEXT_PASS (pass_lower_complex_O0);
>   NEXT_PASS (pass_cleanup_eh);
>   NEXT_PASS (pass_lower_resx);
> Index: gcc/reg-notes.def
> ===================================================================
> --- gcc/reg-notes.def   (.../trunk)     (revision 180744)
> +++ gcc/reg-notes.def   (.../branches/transactional-memory)     (revision
> 180773)
> @@ -203,6 +203,11 @@ REG_NOTE (CROSSING_JUMP)
>    functions that can return twice.  */
>  REG_NOTE (SETJMP)
>
> +/* This kind of note is generated at each transactional memory
> +   builtin, to indicate we need to generate transaction restart
> +   edges for this insn.  */
> +REG_NOTE (TM)
> +
>  /* Indicates the cumulative offset of the stack pointer accounting
>    for pushed arguments.  This will only be generated when
>    ACCUMULATE_OUTGOING_ARGS is false.  */
> Index: gcc/cfgrtl.c
> ===================================================================
> --- gcc/cfgrtl.c        (.../trunk)     (revision 180744)
> +++ gcc/cfgrtl.c        (.../branches/transactional-memory)     (revision
> 180773)
> @@ -2246,6 +2246,8 @@ purge_dead_edges (basic_block bb)
>            ;
>          else if ((e->flags & EDGE_EH) && can_throw_internal (insn))
>            ;
> +         else if (flag_tm && find_reg_note (insn, REG_TM, NULL))
> +           ;
>          else
>            remove = true;
>        }
> Index: gcc/params.def
> ===================================================================
> --- gcc/params.def      (.../trunk)     (revision 180744)
> +++ gcc/params.def      (.../branches/transactional-memory)     (revision
> 180773)
> @@ -872,6 +872,13 @@ DEFPARAM (PARAM_IPA_SRA_PTR_GROWTH_FACTO
>          "a pointer to an aggregate with",
>          2, 0, 0)
>
> +DEFPARAM (PARAM_TM_MAX_AGGREGATE_SIZE,
> +         "tm-max-aggregate-size",
> +         "Size in bytes after which thread-local aggregates should be "
> +         "instrumented with the logging functions instead of save/restore "
> +         "pairs",
> +         9, 0, 0)
> +
>  DEFPARAM (PARAM_IPA_CP_VALUE_LIST_SIZE,
>          "ipa-cp-value-list-size",
>          "Maximum size of a list of values associated with each parameter
> for "
>

Patch hide | download patch | download mbox

Index: gcc/cgraph.h
===================================================================
--- gcc/cgraph.h	(.../trunk)	(revision 180744)
+++ gcc/cgraph.h	(.../branches/transactional-memory)	(revision 180773)
@@ -98,6 +98,9 @@  struct GTY(()) cgraph_local_info {
    /* True when the function has been originally extern inline, but it is
       redefined now.  */
    unsigned redefined_extern_inline : 1;
+
+  /* True if the function may enter serial irrevocable mode.  */
+  unsigned tm_may_enter_irr : 1;
  };

  /* Information about the function that needs to be computed globally
@@ -565,6 +568,8 @@  void verify_cgraph_node (struct cgraph_n
  void cgraph_build_static_cdtor (char which, tree body, int priority);
  void cgraph_reset_static_var_maps (void);
  void init_cgraph (void);
+struct cgraph_node * cgraph_copy_node_for_versioning (struct cgraph_node *,
+		tree, VEC(cgraph_edge_p,heap)*, bitmap);
  struct cgraph_node *cgraph_function_versioning (struct cgraph_node *,
  						VEC(cgraph_edge_p,heap)*,
  						VEC(ipa_replace_map_p,gc)*,
Index: gcc/tree-pass.h
===================================================================
--- gcc/tree-pass.h	(.../trunk)	(revision 180744)
+++ gcc/tree-pass.h	(.../branches/transactional-memory)	(revision 180773)
@@ -447,6 +447,12 @@  extern struct gimple_opt_pass pass_build
  extern struct gimple_opt_pass pass_local_pure_const;
  extern struct gimple_opt_pass pass_tracer;
  extern struct gimple_opt_pass pass_warn_unused_result;
+extern struct gimple_opt_pass pass_diagnose_tm_blocks;
+extern struct gimple_opt_pass pass_lower_tm;
+extern struct gimple_opt_pass pass_tm_init;
+extern struct gimple_opt_pass pass_tm_mark;
+extern struct gimple_opt_pass pass_tm_memopt;
+extern struct gimple_opt_pass pass_tm_edges;
  extern struct gimple_opt_pass pass_split_functions;
  extern struct gimple_opt_pass pass_feedback_split_functions;

@@ -469,6 +475,7 @@  extern struct ipa_opt_pass_d pass_ipa_pu
  extern struct simple_ipa_opt_pass pass_ipa_pta;
  extern struct ipa_opt_pass_d pass_ipa_lto_wpa_fixup;
  extern struct ipa_opt_pass_d pass_ipa_lto_finish_out;
+extern struct simple_ipa_opt_pass pass_ipa_tm;
  extern struct ipa_opt_pass_d pass_ipa_profile;
  extern struct ipa_opt_pass_d pass_ipa_cdtor_merge;

Index: gcc/rtlanal.c
===================================================================
--- gcc/rtlanal.c	(.../trunk)	(revision 180744)
+++ gcc/rtlanal.c	(.../branches/transactional-memory)	(revision 180773)
@@ -1918,6 +1918,7 @@  alloc_reg_note (enum reg_note kind, rtx
      case REG_CC_USER:
      case REG_LABEL_TARGET:
      case REG_LABEL_OPERAND:
+    case REG_TM:
        /* These types of register notes use an INSN_LIST rather than an
  	 EXPR_LIST, so that copying is done right and dumps look
  	 better.  */
Index: gcc/omp-low.c
===================================================================
--- gcc/omp-low.c	(.../trunk)	(revision 180744)
+++ gcc/omp-low.c	(.../branches/transactional-memory)	(revision 180773)
@@ -139,6 +139,7 @@  static tree scan_omp_1_op (tree *, int *
      case GIMPLE_TRY: \
      case GIMPLE_CATCH: \
      case GIMPLE_EH_FILTER: \
+    case GIMPLE_TRANSACTION: \
        /* The sub-statements for these should be walked.  */ \
        *handled_ops_p = false; \
        break;
Index: gcc/toplev.c
===================================================================
--- gcc/toplev.c	(.../trunk)	(revision 180744)
+++ gcc/toplev.c	(.../branches/transactional-memory)	(revision 180773)
@@ -599,6 +599,7 @@  compile_file (void)

        output_shared_constant_pool ();
        output_object_blocks ();
+  finish_tm_clone_pairs ();

        /* Write out any pending weak symbol declarations.  */
        weak_finish ();
Index: gcc/cgraphunit.c
===================================================================
--- gcc/cgraphunit.c	(.../trunk)	(revision 180744)
+++ gcc/cgraphunit.c	(.../branches/transactional-memory)	(revision 180773)
@@ -2272,7 +2272,7 @@  update_call_expr (struct cgraph_node *ne
     was copied to prevent duplications of calls that are dead
     in the clone.  */

-static struct cgraph_node *
+struct cgraph_node *
  cgraph_copy_node_for_versioning (struct cgraph_node *old_version,
  				 tree new_decl,
  				 VEC(cgraph_edge_p,heap) *redirect_callers,
@@ -2286,7 +2286,7 @@  cgraph_copy_node_for_versioning (struct

     new_version = cgraph_create_node (new_decl);

-   new_version->analyzed = true;
+   new_version->analyzed = old_version->analyzed;
     new_version->local = old_version->local;
     new_version->local.externally_visible = false;
     new_version->local.local = true;
@@ -2294,6 +2294,7 @@  cgraph_copy_node_for_versioning (struct
     new_version->rtl = old_version->rtl;
     new_version->reachable = true;
     new_version->count = old_version->count;
+   new_version->lowered = true;

     for (e = old_version->callees; e; e=e->next_callee)
       if (!bbs_to_copy
@@ -2389,7 +2390,6 @@  cgraph_function_versioning (struct cgrap
    DECL_VIRTUAL_P (new_version_node->decl) = 0;
    new_version_node->local.externally_visible = 0;
    new_version_node->local.local = 1;
-  new_version_node->lowered = true;

    /* Update the call_expr on the edges to call the new version node. */
    update_call_expr (new_version_node);
Index: gcc/tree-ssa-alias.c
===================================================================
--- gcc/tree-ssa-alias.c	(.../trunk)	(revision 180744)
+++ gcc/tree-ssa-alias.c	(.../branches/transactional-memory)	(revision 
180773)
@@ -1182,6 +1182,8 @@  ref_maybe_used_by_call_p_1 (gimple call,
  	case BUILT_IN_MEMPCPY:
  	case BUILT_IN_STPCPY:
  	case BUILT_IN_STPNCPY:
+        case BUILT_IN_TM_MEMCPY:
+        case BUILT_IN_TM_MEMMOVE:
  	  {
  	    ao_ref dref;
  	    tree size = NULL_TREE;
@@ -1228,6 +1230,32 @@  ref_maybe_used_by_call_p_1 (gimple call,
  					   size);
  	    return refs_may_alias_p_1 (&dref, ref, false);
  	  }
+
+        /* The following functions read memory pointed to by their
+	   first argument.  */
+	CASE_BUILT_IN_TM_LOAD (1):
+	CASE_BUILT_IN_TM_LOAD (2):
+	CASE_BUILT_IN_TM_LOAD (4):
+	CASE_BUILT_IN_TM_LOAD (8):
+        CASE_BUILT_IN_TM_LOAD (FLOAT):
+	CASE_BUILT_IN_TM_LOAD (DOUBLE):
+	CASE_BUILT_IN_TM_LOAD (LDOUBLE):
+	CASE_BUILT_IN_TM_LOAD (M64):
+	CASE_BUILT_IN_TM_LOAD (M128):
+	CASE_BUILT_IN_TM_LOAD (M256):
+        case BUILT_IN_TM_LOG:
+        case BUILT_IN_TM_LOG_1:
+        case BUILT_IN_TM_LOG_2:
+        case BUILT_IN_TM_LOG_4:
+        case BUILT_IN_TM_LOG_8:
+        case BUILT_IN_TM_LOG_FLOAT:
+        case BUILT_IN_TM_LOG_DOUBLE:
+        case BUILT_IN_TM_LOG_LDOUBLE:
+        case BUILT_IN_TM_LOG_M64:
+        case BUILT_IN_TM_LOG_M128:
+        case BUILT_IN_TM_LOG_M256:
+	  return ptr_deref_may_alias_ref_p_1 (gimple_call_arg (call, 0), ref);
+
  	/* These read memory pointed to by the first argument.  */
  	case BUILT_IN_STRDUP:
  	case BUILT_IN_STRNDUP:
@@ -1250,6 +1278,7 @@  ref_maybe_used_by_call_p_1 (gimple call,
  	case BUILT_IN_STACK_SAVE:
  	case BUILT_IN_STACK_RESTORE:
  	case BUILT_IN_MEMSET:
+        case BUILT_IN_TM_MEMSET:
  	case BUILT_IN_MEMSET_CHK:
  	case BUILT_IN_FREXP:
  	case BUILT_IN_FREXPF:
@@ -1480,6 +1509,19 @@  call_may_clobber_ref_p_1 (gimple call, a
  	case BUILT_IN_STRCAT:
  	case BUILT_IN_STRNCAT:
  	case BUILT_IN_MEMSET:
+        case BUILT_IN_TM_MEMSET:
+        CASE_BUILT_IN_TM_STORE (1):
+        CASE_BUILT_IN_TM_STORE (2):
+        CASE_BUILT_IN_TM_STORE (4):
+        CASE_BUILT_IN_TM_STORE (8):
+        CASE_BUILT_IN_TM_STORE (FLOAT):
+        CASE_BUILT_IN_TM_STORE (DOUBLE):
+        CASE_BUILT_IN_TM_STORE (LDOUBLE):
+        CASE_BUILT_IN_TM_STORE (M64):
+        CASE_BUILT_IN_TM_STORE (M128):
+        CASE_BUILT_IN_TM_STORE (M256):
+        case BUILT_IN_TM_MEMCPY:
+        case BUILT_IN_TM_MEMMOVE:
  	  {
  	    ao_ref dref;
  	    tree size = NULL_TREE;
Index: gcc/ipa-inline.c
===================================================================
--- gcc/ipa-inline.c	(.../trunk)	(revision 180744)
+++ gcc/ipa-inline.c	(.../branches/transactional-memory)	(revision 180773)
@@ -284,6 +284,15 @@  can_inline_edge_p (struct cgraph_edge *e
        e->inline_failed = CIF_EH_PERSONALITY;
        inlinable = false;
      }
+  /* TM pure functions should not get inlined if the outer function is
+     a TM safe function.  */
+  else if (flag_tm
+	   && is_tm_pure (callee->decl)
+	   && is_tm_safe (e->caller->decl))
+    {
+      e->inline_failed = CIF_UNSPECIFIED;
+      inlinable = false;
+    }
    /* Don't inline if the callee can throw non-call exceptions but the
       caller cannot.
       FIXME: this is obviously wrong for LTO where STRUCT_FUNCTION is 
missing.
Index: gcc/crtstuff.c
===================================================================
--- gcc/crtstuff.c	(.../trunk)	(revision 180744)
+++ gcc/crtstuff.c	(.../branches/transactional-memory)	(revision 180773)
@@ -162,6 +162,9 @@  extern void __do_global_ctors_1 (void);
  /* Likewise for _Jv_RegisterClasses.  */
  extern void _Jv_RegisterClasses (void *) TARGET_ATTRIBUTE_WEAK;

+extern void _ITM_registerTMCloneTable (void *, size_t) 
TARGET_ATTRIBUTE_WEAK;
+extern void _ITM_deregisterTMCloneTable (void *) TARGET_ATTRIBUTE_WEAK;
+
  #ifdef OBJECT_FORMAT_ELF

  /*  Declare a pointer to void function type.  */
@@ -241,6 +244,11 @@  STATIC void *__JCR_LIST__[]
    = { };
  #endif /* JCR_SECTION_NAME */

+STATIC func_ptr __TMC_LIST__[]
+  __attribute__((unused, section(".tm_clone_table"), 
aligned(sizeof(void*))))
+  = { };
+extern func_ptr __TMC_END__[] __attribute__((__visibility__ ("hidden")));
+
  #if defined(INIT_SECTION_ASM_OP) || defined(INIT_ARRAY_SECTION_ASM_OP)

  #ifdef OBJECT_FORMAT_ELF
@@ -330,6 +338,13 @@  __do_global_dtors_aux (void)
    }
  #endif /* !defined(FINI_ARRAY_SECTION_ASM_OP) */

+  if (_ITM_deregisterTMCloneTable)
+    {
+      size_t size = (size_t)(__TMC_END__ - __TMC_LIST__) / 2;
+      if (size > 0)
+	_ITM_deregisterTMCloneTable (__TMC_LIST__);
+    }
+
  #ifdef USE_EH_FRAME_REGISTRY
  #ifdef CRT_GET_RFIB_DATA
    /* If we used the new __register_frame_info_bases interface,
@@ -391,6 +406,12 @@  frame_dummy (void)
  	register_classes (__JCR_LIST__);
      }
  #endif /* JCR_SECTION_NAME */
+  if (_ITM_registerTMCloneTable)
+    {
+      size_t size = (size_t)(__TMC_END__ - __TMC_LIST__) / 2;
+      if (size > 0)
+	_ITM_registerTMCloneTable (__TMC_LIST__, size);
+    }
  }

  #ifdef INIT_SECTION_ASM_OP
@@ -457,6 +478,13 @@  __do_global_dtors (void)
    for (p = __DTOR_LIST__ + 1; (f = *p); p++)
      f ();

+  if (_ITM_deregisterTMCloneTable)
+    {
+      size_t size = (size_t)(__TMC_END__ - __TMC_LIST__) / 2;
+      if (size > 0)
+	_ITM_deregisterTMCloneTable (__TMC_LIST__);
+    }
+
  #ifdef USE_EH_FRAME_REGISTRY
    if (__deregister_frame_info)
      __deregister_frame_info (__EH_FRAME_BEGIN__);
@@ -570,6 +598,11 @@  STATIC void *__JCR_END__[1]
     = { 0 };
  #endif /* JCR_SECTION_NAME */

+func_ptr __TMC_END__[]
+  __attribute__((unused, section(".tm_clone_table"), 
aligned(sizeof(void *)),
+		 __visibility__ ("hidden")))
+  = { };
+
  #ifdef INIT_ARRAY_SECTION_ASM_OP

  /* If we are using .init_array, there is nothing to do.  */
Index: gcc/cfgbuild.c
===================================================================
--- gcc/cfgbuild.c	(.../trunk)	(revision 180744)
+++ gcc/cfgbuild.c	(.../branches/transactional-memory)	(revision 180773)
@@ -338,18 +338,30 @@  make_edges (basic_block min, basic_block
  	  /* Add any appropriate EH edges.  */
  	  rtl_make_eh_edge (edge_cache, bb, insn);

-	  if (code == CALL_INSN && nonlocal_goto_handler_labels)
+	  if (code == CALL_INSN)
  	    {
-	      /* ??? This could be made smarter: in some cases it's possible
-		 to tell that certain calls will not do a nonlocal goto.
-		 For example, if the nested functions that do the nonlocal
-		 gotos do not have their addresses taken, then only calls to
-		 those functions or to other nested functions that use them
-		 could possibly do nonlocal gotos.  */
  	      if (can_nonlocal_goto (insn))
-		for (x = nonlocal_goto_handler_labels; x; x = XEXP (x, 1))
-		  make_label_edge (edge_cache, bb, XEXP (x, 0),
-				   EDGE_ABNORMAL | EDGE_ABNORMAL_CALL);
+		{
+		  /* ??? This could be made smarter: in some cases it's
+		     possible to tell that certain calls will not do a
+		     nonlocal goto.  For example, if the nested functions
+		     that do the nonlocal gotos do not have their addresses
+		     taken, then only calls to those functions or to other
+		     nested functions that use them could possibly do
+		     nonlocal gotos.  */
+		  for (x = nonlocal_goto_handler_labels; x; x = XEXP (x, 1))
+		    make_label_edge (edge_cache, bb, XEXP (x, 0),
+				     EDGE_ABNORMAL | EDGE_ABNORMAL_CALL);
+		}
+
+	      if (flag_tm)
+		{
+		  rtx note;
+		  for (note = REG_NOTES (insn); note; note = XEXP (note, 1))
+		    if (REG_NOTE_KIND (note) == REG_TM)
+		      make_label_edge (edge_cache, bb, XEXP (note, 0),
+				       EDGE_ABNORMAL | EDGE_ABNORMAL_CALL);
+		}
  	    }
  	}

Index: gcc/timevar.def
===================================================================
--- gcc/timevar.def	(.../trunk)	(revision 180744)
+++ gcc/timevar.def	(.../branches/transactional-memory)	(revision 180773)
@@ -184,6 +184,7 @@  DEFTIMEVAR (TV_TREE_COPY_RENAME	     , "
  DEFTIMEVAR (TV_TREE_SSA_VERIFY       , "tree SSA verifier")
  DEFTIMEVAR (TV_TREE_STMT_VERIFY      , "tree STMT verifier")
  DEFTIMEVAR (TV_TREE_SWITCH_CONVERSION, "tree switch initialization 
conversion")
+DEFTIMEVAR (TV_TRANS_MEM             , "transactional memory")
  DEFTIMEVAR (TV_TREE_STRLEN           , "tree strlen optimization")
  DEFTIMEVAR (TV_CGRAPH_VERIFY         , "callgraph verifier")
  DEFTIMEVAR (TV_DOM_FRONTIERS         , "dominance frontiers")
Index: gcc/recog.c
===================================================================
--- gcc/recog.c	(.../trunk)	(revision 180744)
+++ gcc/recog.c	(.../branches/transactional-memory)	(revision 180773)
@@ -3287,6 +3287,7 @@  peep2_attempt (basic_block bb, rtx insn,
  	  {
  	  case REG_NORETURN:
  	  case REG_SETJMP:
+	  case REG_TM:
  	    add_reg_note (new_insn, REG_NOTE_KIND (note),
  			  XEXP (note, 0));
  	    break;
Index: gcc/function.h
===================================================================
--- gcc/function.h	(.../trunk)	(revision 180744)
+++ gcc/function.h	(.../branches/transactional-memory)	(revision 180773)
@@ -467,6 +467,14 @@  extern GTY(()) struct rtl_data x_rtl;
     want to do differently.  */
  #define crtl (&x_rtl)

+/* This structure is used to map a gimple statement to a label,
+   or list of labels to represent transaction restart.  */
+
+struct GTY(()) tm_restart_node {
+  gimple stmt;
+  tree label_or_list;
+};
+
  struct GTY(()) stack_usage
  {
    /* # of bytes of static stack space allocated by the function.  */
@@ -518,6 +526,10 @@  struct GTY(()) function {
    /* Value histograms attached to particular statements.  */
    htab_t GTY((skip)) value_histograms;

+  /* Map gimple stmt to tree label (or list of labels) for transaction
+     restart and abort.  */
+  htab_t GTY ((param_is (struct tm_restart_node))) tm_restart;
+
    /* For function.c.  */

    /* Points to the FUNCTION_DECL of this function.  */
Index: gcc/emit-rtl.c
===================================================================
--- gcc/emit-rtl.c	(.../trunk)	(revision 180744)
+++ gcc/emit-rtl.c	(.../branches/transactional-memory)	(revision 180773)
@@ -3595,6 +3595,7 @@  try_split (rtx pat, rtx trial, int last)

  	case REG_NORETURN:
  	case REG_SETJMP:
+	case REG_TM:
  	  for (insn = insn_last; insn != NULL_RTX; insn = PREV_INSN (insn))
  	    {
  	      if (CALL_P (insn))
Index: gcc/cfgexpand.c
===================================================================
--- gcc/cfgexpand.c	(.../trunk)	(revision 180744)
+++ gcc/cfgexpand.c	(.../branches/transactional-memory)	(revision 180773)
@@ -2096,6 +2096,32 @@  expand_gimple_stmt (gimple stmt)
  	}
      }

+  /* Mark all calls that can have a transaction restart.  */
+  if (cfun->tm_restart && is_gimple_call (stmt))
+    {
+      struct tm_restart_node dummy;
+      void **slot;
+
+      dummy.stmt = stmt;
+      slot = htab_find_slot (cfun->tm_restart, &dummy, NO_INSERT);
+      if (slot)
+	{
+	  struct tm_restart_node *n = (struct tm_restart_node *) *slot;
+	  tree list = n->label_or_list;
+	  rtx insn;
+
+	  for (insn = next_real_insn (last); !CALL_P (insn);
+	       insn = next_real_insn (insn))
+	    continue;
+
+	  if (TREE_CODE (list) == LABEL_DECL)
+	    add_reg_note (insn, REG_TM, label_rtx (list));
+	  else
+	    for (; list ; list = TREE_CHAIN (list))
+	      add_reg_note (insn, REG_TM, label_rtx (TREE_VALUE (list)));
+	}
+    }
+
    return last;
  }

@@ -4455,6 +4481,10 @@  gimple_expand_cfg (void)
    /* After expanding, the return labels are no longer needed. */
    return_label = NULL;
    naked_return_label = NULL;
+
+  /* After expanding, the tm_restart map is no longer needed.  */
+  cfun->tm_restart = NULL;
+
    /* Tag the blocks with a depth number so that change_scope can find
       the common parent easily.  */
    set_block_levels (DECL_INITIAL (cfun->decl), 0);
Index: gcc/varasm.c
===================================================================
--- gcc/varasm.c	(.../trunk)	(revision 180744)
+++ gcc/varasm.c	(.../branches/transactional-memory)	(revision 180773)
@@ -5859,6 +5859,103 @@  assemble_alias (tree decl, tree target)
      }
  }

+/* Record and output a table of translations from original function
+   to its transaction aware clone.  Note that tm_pure functions are
+   considered to be their own clone.  */
+
+static GTY((if_marked ("tree_map_marked_p"), param_is (struct tree_map)))
+     htab_t tm_clone_pairs;
+
+void
+record_tm_clone_pair (tree o, tree n)
+{
+  struct tree_map **slot, *h;
+
+  if (tm_clone_pairs == NULL)
+    tm_clone_pairs = htab_create_ggc (32, tree_map_hash, tree_map_eq, 0);
+
+  h = ggc_alloc_tree_map ();
+  h->hash = htab_hash_pointer (o);
+  h->base.from = o;
+  h->to = n;
+
+  slot = (struct tree_map **)
+    htab_find_slot_with_hash (tm_clone_pairs, h, h->hash, INSERT);
+  *slot = h;
+}
+
+tree
+get_tm_clone_pair (tree o)
+{
+  if (tm_clone_pairs)
+    {
+      struct tree_map *h, in;
+
+      in.base.from = o;
+      in.hash = htab_hash_pointer (o);
+      h = (struct tree_map *) htab_find_with_hash (tm_clone_pairs,
+						   &in, in.hash);
+      if (h)
+	return h->to;
+    }
+  return NULL_TREE;
+}
+
+/* Helper function for finish_tm_clone_pairs.  Dump the clone table.  */
+
+int
+finish_tm_clone_pairs_1 (void **slot, void *info ATTRIBUTE_UNUSED)
+{
+  struct tree_map *map = (struct tree_map *) *slot;
+  bool *switched = (bool *) info;
+  tree src = map->base.from;
+  tree dst = map->to;
+  struct cgraph_node *src_n = cgraph_get_node (src);
+  struct cgraph_node *dst_n = cgraph_get_node (dst);
+
+  /* The function ipa_tm_create_version() marks the clone as needed if
+     the original function was needed.  But we also mark the clone as
+     needed if we ever called the clone indirectly through
+     TM_GETTMCLONE.  If neither of these are true, we didn't generate
+     a clone, and we didn't call it indirectly... no sense keeping it
+     in the clone table.  */
+  if (!dst_n || !dst_n->needed)
+    return 1;
+
+  /* This covers the case where we have optimized the original
+     function away, and only access the transactional clone.  */
+  if (!src_n || !src_n->needed)
+    return 1;
+
+  if (!*switched)
+    {
+      switch_to_section (get_named_section (NULL, ".tm_clone_table", 3));
+      assemble_align (POINTER_SIZE);
+      *switched = true;
+    }
+
+  assemble_integer (XEXP (DECL_RTL (src), 0),
+		    POINTER_SIZE / BITS_PER_UNIT, POINTER_SIZE, 1);
+  assemble_integer (XEXP (DECL_RTL (dst), 0),
+		    POINTER_SIZE / BITS_PER_UNIT, POINTER_SIZE, 1);
+  return 1;
+}
+
+void
+finish_tm_clone_pairs (void)
+{
+  bool switched = false;
+
+  if (tm_clone_pairs == NULL)
+    return;
+
+  htab_traverse_noresize (tm_clone_pairs, finish_tm_clone_pairs_1,
+			  (void *) &switched);
+  htab_delete (tm_clone_pairs);
+  tm_clone_pairs = NULL;
+}
+
+
  /* Emit an assembler directive to set symbol for DECL visibility to
     the visibility type VIS, which must not be VISIBILITY_DEFAULT.  */

Index: gcc/output.h
===================================================================
--- gcc/output.h	(.../trunk)	(revision 180744)
+++ gcc/output.h	(.../branches/transactional-memory)	(revision 180773)
@@ -606,6 +606,11 @@  extern bool unlikely_text_section_p (sec
  extern void switch_to_section (section *);
  extern void output_section_asm_op (const void *);

+extern void record_tm_clone_pair (tree, tree);
+extern void finish_tm_clone_pairs (void);
+extern int finish_tm_clone_pairs_1 (void **, void *);
+extern tree get_tm_clone_pair (tree);
+
  extern void default_asm_output_source_filename (FILE *, const char *);
  extern void output_file_directive (FILE *, const char *);

Index: gcc/combine.c
===================================================================
--- gcc/combine.c	(.../trunk)	(revision 180744)
+++ gcc/combine.c	(.../branches/transactional-memory)	(revision 180773)
@@ -13286,6 +13286,7 @@  distribute_notes (rtx notes, rtx from_in

  	case REG_NORETURN:
  	case REG_SETJMP:
+	case REG_TM:
  	  /* These notes must remain with the call.  It should not be
  	     possible for both I2 and I3 to be a call.  */
  	  if (CALL_P (i3))
Index: gcc/tree-flow.h
===================================================================
--- gcc/tree-flow.h	(.../trunk)	(revision 180744)
+++ gcc/tree-flow.h	(.../branches/transactional-memory)	(revision 180773)
@@ -778,6 +778,9 @@  extern bool maybe_duplicate_eh_stmt (gim
  extern bool verify_eh_edges (gimple);
  extern bool verify_eh_dispatch_edge (gimple);

+/* In gtm-low.c  */
+extern bool is_transactional_stmt (const_gimple);
+
  /* In tree-ssa-pre.c  */
  struct pre_expr_d;
  void add_to_value (unsigned int, struct pre_expr_d *);
Index: gcc/tree-ssa-structalias.c
===================================================================
--- gcc/tree-ssa-structalias.c	(.../trunk)	(revision 180744)
+++ gcc/tree-ssa-structalias.c	(.../branches/transactional-memory) 
(revision 180773)
@@ -4024,6 +4024,8 @@  find_func_aliases_for_builtin_call (gimp
        case BUILT_IN_STPCPY_CHK:
        case BUILT_IN_STRCAT_CHK:
        case BUILT_IN_STRNCAT_CHK:
+      case BUILT_IN_TM_MEMCPY:
+      case BUILT_IN_TM_MEMMOVE:
  	{
  	  tree res = gimple_call_lhs (t);
  	  tree dest = gimple_call_arg (t, (DECL_FUNCTION_CODE (fndecl)
@@ -4056,6 +4058,7 @@  find_func_aliases_for_builtin_call (gimp
  	}
        case BUILT_IN_MEMSET:
        case BUILT_IN_MEMSET_CHK:
+      case BUILT_IN_TM_MEMSET:
  	{
  	  tree res = gimple_call_lhs (t);
  	  tree dest = gimple_call_arg (t, 0);
@@ -4197,6 +4200,50 @@  find_func_aliases_for_builtin_call (gimp
  	    }
  	  return true;
  	}
+      CASE_BUILT_IN_TM_STORE (1):
+      CASE_BUILT_IN_TM_STORE (2):
+      CASE_BUILT_IN_TM_STORE (4):
+      CASE_BUILT_IN_TM_STORE (8):
+      CASE_BUILT_IN_TM_STORE (FLOAT):
+      CASE_BUILT_IN_TM_STORE (DOUBLE):
+      CASE_BUILT_IN_TM_STORE (LDOUBLE):
+      CASE_BUILT_IN_TM_STORE (M64):
+      CASE_BUILT_IN_TM_STORE (M128):
+      CASE_BUILT_IN_TM_STORE (M256):
+	{
+	  tree addr = gimple_call_arg (t, 0);
+	  tree src = gimple_call_arg (t, 1);
+
+	  get_constraint_for (addr, &lhsc);
+	  do_deref (&lhsc);
+	  get_constraint_for (src, &rhsc);
+	  process_all_all_constraints (lhsc, rhsc);
+	  VEC_free (ce_s, heap, lhsc);
+	  VEC_free (ce_s, heap, rhsc);
+	  return true;
+	}
+      CASE_BUILT_IN_TM_LOAD (1):
+      CASE_BUILT_IN_TM_LOAD (2):
+      CASE_BUILT_IN_TM_LOAD (4):
+      CASE_BUILT_IN_TM_LOAD (8):
+      CASE_BUILT_IN_TM_LOAD (FLOAT):
+      CASE_BUILT_IN_TM_LOAD (DOUBLE):
+      CASE_BUILT_IN_TM_LOAD (LDOUBLE):
+      CASE_BUILT_IN_TM_LOAD (M64):
+      CASE_BUILT_IN_TM_LOAD (M128):
+      CASE_BUILT_IN_TM_LOAD (M256):
+        {
+	  tree dest = gimple_call_lhs (t);
+	  tree addr = gimple_call_arg (t, 0);
+
+	  get_constraint_for (dest, &lhsc);
+	  get_constraint_for (addr, &rhsc);
+	  do_deref (&rhsc);
+	  process_all_all_constraints (lhsc, rhsc);
+	  VEC_free (ce_s, heap, lhsc);
+	  VEC_free (ce_s, heap, rhsc);
+	  return true;
+        }
        /* Variadic argument handling needs to be handled in IPA
  	 mode as well.  */
        case BUILT_IN_VA_START:
Index: gcc/tree-cfg.c
===================================================================
--- gcc/tree-cfg.c	(.../trunk)	(revision 180744)
+++ gcc/tree-cfg.c	(.../branches/transactional-memory)	(revision 180773)
@@ -666,6 +666,15 @@  make_edges (void)
  		}
  	      break;

+	    case GIMPLE_TRANSACTION:
+	      {
+		tree abort_label = gimple_transaction_label (last);
+		if (abort_label)
+		  make_edge (bb, label_to_block (abort_label), 0);
+		fallthru = true;
+	      }
+	      break;
+
  	    default:
  	      gcc_assert (!stmt_ends_bb_p (last));
  	      fallthru = true;
@@ -1196,22 +1205,30 @@  cleanup_dead_labels (void)
    FOR_EACH_BB (bb)
      {
        gimple stmt = last_stmt (bb);
+      tree label, new_label;
+
        if (!stmt)
  	continue;

        switch (gimple_code (stmt))
  	{
  	case GIMPLE_COND:
-	  {
-	    tree true_label = gimple_cond_true_label (stmt);
-	    tree false_label = gimple_cond_false_label (stmt);
+	  label = gimple_cond_true_label (stmt);
+	  if (label)
+	    {
+	      new_label = main_block_label (label);
+	      if (new_label != label)
+		gimple_cond_set_true_label (stmt, new_label);
+	    }

-	    if (true_label)
-	      gimple_cond_set_true_label (stmt, main_block_label (true_label));
-	    if (false_label)
-	      gimple_cond_set_false_label (stmt, main_block_label (false_label));
-	    break;
-	  }
+	  label = gimple_cond_false_label (stmt);
+	  if (label)
+	    {
+	      new_label = main_block_label (label);
+	      if (new_label != label)
+		gimple_cond_set_false_label (stmt, new_label);
+	    }
+	  break;

  	case GIMPLE_SWITCH:
  	  {
@@ -1221,8 +1238,10 @@  cleanup_dead_labels (void)
  	    for (i = 0; i < n; ++i)
  	      {
  		tree case_label = gimple_switch_label (stmt, i);
-		tree label = main_block_label (CASE_LABEL (case_label));
-		CASE_LABEL (case_label) = label;
+		label = CASE_LABEL (case_label);
+		new_label = main_block_label (label);
+		if (new_label != label)
+		  CASE_LABEL (case_label) = new_label;
  	      }
  	    break;
  	  }
@@ -1243,13 +1262,27 @@  cleanup_dead_labels (void)
  	/* We have to handle gotos until they're removed, and we don't
  	   remove them until after we've created the CFG edges.  */
  	case GIMPLE_GOTO:
-          if (!computed_goto_p (stmt))
+	  if (!computed_goto_p (stmt))
  	    {
-	      tree new_dest = main_block_label (gimple_goto_dest (stmt));
-	      gimple_goto_set_dest (stmt, new_dest);
+	      label = gimple_goto_dest (stmt);
+	      new_label = main_block_label (label);
+	      if (new_label != label)
+		gimple_goto_set_dest (stmt, new_label);
  	    }
  	  break;

+	case GIMPLE_TRANSACTION:
+	  {
+	    tree label = gimple_transaction_label (stmt);
+	    if (label)
+	      {
+		tree new_label = main_block_label (label);
+		if (new_label != label)
+		  gimple_transaction_set_label (stmt, new_label);
+	      }
+	  }
+	  break;
+
  	default:
  	  break;
        }
@@ -2263,6 +2296,13 @@  is_ctrl_altering_stmt (gimple t)
  	if (flags & ECF_NORETURN)
  	  return true;

+	/* TM ending statements have backedges out of the transaction.
+	   Return true so we split the basic block containing
+	   them.  */
+	if ((flags & ECF_TM_OPS)
+	    && is_tm_ending_fndecl (gimple_call_fndecl (t)))
+	  return true;
+
  	/* BUILT_IN_RETURN call is same as return statement.  */
  	if (gimple_call_builtin_p (t, BUILT_IN_RETURN))
  	  return true;
@@ -2284,6 +2324,10 @@  is_ctrl_altering_stmt (gimple t)
        /* OpenMP directives alter control flow.  */
        return true;

+    case GIMPLE_TRANSACTION:
+      /* A transaction start alters control flow.  */
+      return true;
+
      default:
        break;
      }
@@ -4054,6 +4098,17 @@  verify_gimple_switch (gimple stmt)
    return false;
  }

+/* Verify the contents of a GIMPLE_TRANSACTION.  Returns true if there
+   is a problem, otherwise false.  */
+
+static bool
+verify_gimple_transaction (gimple stmt)
+{
+  tree lab = gimple_transaction_label (stmt);
+  if (lab != NULL && TREE_CODE (lab) != LABEL_DECL)
+    return true;
+  return false;
+}

  /* Verify a gimple debug statement STMT.
     Returns true if anything is wrong.  */
@@ -4155,6 +4210,9 @@  verify_gimple_stmt (gimple stmt)
      case GIMPLE_ASM:
        return false;

+    case GIMPLE_TRANSACTION:
+      return verify_gimple_transaction (stmt);
+
      /* Tuples that do not have tree operands.  */
      case GIMPLE_NOP:
      case GIMPLE_PREDICT:
@@ -4271,10 +4329,19 @@  verify_gimple_in_seq_2 (gimple_seq stmts
  	  err |= verify_gimple_in_seq_2 (gimple_eh_filter_failure (stmt));
  	  break;

+	case GIMPLE_EH_ELSE:
+	  err |= verify_gimple_in_seq_2 (gimple_eh_else_n_body (stmt));
+	  err |= verify_gimple_in_seq_2 (gimple_eh_else_e_body (stmt));
+	  break;
+
  	case GIMPLE_CATCH:
  	  err |= verify_gimple_in_seq_2 (gimple_catch_handler (stmt));
  	  break;

+	case GIMPLE_TRANSACTION:
+	  err |= verify_gimple_in_seq_2 (gimple_transaction_body (stmt));
+	  break;
+
  	default:
  	  {
  	    bool err2 = verify_gimple_stmt (stmt);
@@ -5052,6 +5119,14 @@  gimple_redirect_edge_and_branch (edge e,
  	redirect_eh_dispatch_edge (stmt, e, dest);
        break;

+    case GIMPLE_TRANSACTION:
+      /* The ABORT edge has a stored label associated with it, otherwise
+	 the edges are simply redirectable.  */
+      /* ??? We don't really need this label after the cfg is created.  */
+      if (e->flags == 0)
+	gimple_transaction_set_label (stmt, gimple_block_label (dest));
+      break;
+
      default:
        /* Otherwise it must be a fallthru edge, and we don't need to
  	 do anything besides redirecting it.  */
@@ -6428,8 +6503,10 @@  dump_function_to_file (tree fn, FILE *fi
    bool ignore_topmost_bind = false, any_var = false;
    basic_block bb;
    tree chain;
+  bool tmclone = TREE_CODE (fn) == FUNCTION_DECL && DECL_IS_TM_CLONE (fn);

-  fprintf (file, "%s (", lang_hooks.decl_printable_name (fn, 2));
+  fprintf (file, "%s %s(", lang_hooks.decl_printable_name (fn, 2),
+	   tmclone ? "[tm-clone] " : "");

    arg = DECL_ARGUMENTS (fn);
    while (arg)
Index: gcc/passes.c
===================================================================
--- gcc/passes.c	(.../trunk)	(revision 180744)
+++ gcc/passes.c	(.../branches/transactional-memory)	(revision 180773)
@@ -1174,9 +1174,11 @@  init_optimization_passes (void)
    p = &all_lowering_passes;
    NEXT_PASS (pass_warn_unused_result);
    NEXT_PASS (pass_diagnose_omp_blocks);
+  NEXT_PASS (pass_diagnose_tm_blocks);
    NEXT_PASS (pass_mudflap_1);
    NEXT_PASS (pass_lower_omp);
    NEXT_PASS (pass_lower_cf);
+  NEXT_PASS (pass_lower_tm);
    NEXT_PASS (pass_refactor_eh);
    NEXT_PASS (pass_lower_eh);
    NEXT_PASS (pass_build_cfg);
@@ -1241,6 +1243,7 @@  init_optimization_passes (void)
      }
    NEXT_PASS (pass_ipa_increase_alignment);
    NEXT_PASS (pass_ipa_matrix_reorg);
+  NEXT_PASS (pass_ipa_tm);
    NEXT_PASS (pass_ipa_lower_emutls);
    *p = NULL;

@@ -1400,6 +1403,13 @@  init_optimization_passes (void)
        NEXT_PASS (pass_uncprop);
        NEXT_PASS (pass_local_pure_const);
      }
+  NEXT_PASS (pass_tm_init);
+    {
+      struct opt_pass **p = &pass_tm_init.pass.sub;
+      NEXT_PASS (pass_tm_mark);
+      NEXT_PASS (pass_tm_memopt);
+      NEXT_PASS (pass_tm_edges);
+    }
    NEXT_PASS (pass_lower_complex_O0);
    NEXT_PASS (pass_cleanup_eh);
    NEXT_PASS (pass_lower_resx);
Index: gcc/reg-notes.def
===================================================================
--- gcc/reg-notes.def	(.../trunk)	(revision 180744)
+++ gcc/reg-notes.def	(.../branches/transactional-memory)	(revision 180773)
@@ -203,6 +203,11 @@  REG_NOTE (CROSSING_JUMP)
     functions that can return twice.  */
  REG_NOTE (SETJMP)

+/* This kind of note is generated at each transactional memory
+   builtin, to indicate we need to generate transaction restart
+   edges for this insn.  */
+REG_NOTE (TM)
+
  /* Indicates the cumulative offset of the stack pointer accounting
     for pushed arguments.  This will only be generated when
     ACCUMULATE_OUTGOING_ARGS is false.  */
Index: gcc/cfgrtl.c
===================================================================
--- gcc/cfgrtl.c	(.../trunk)	(revision 180744)
+++ gcc/cfgrtl.c	(.../branches/transactional-memory)	(revision 180773)
@@ -2246,6 +2246,8 @@  purge_dead_edges (basic_block bb)
  	    ;
  	  else if ((e->flags & EDGE_EH) && can_throw_internal (insn))
  	    ;
+	  else if (flag_tm && find_reg_note (insn, REG_TM, NULL))
+	    ;
  	  else
  	    remove = true;
  	}
Index: gcc/params.def
===================================================================
--- gcc/params.def	(.../trunk)	(revision 180744)
+++ gcc/params.def	(.../branches/transactional-memory)	(revision 180773)
@@ -872,6 +872,13 @@  DEFPARAM (PARAM_IPA_SRA_PTR_GROWTH_FACTO
  	  "a pointer to an aggregate with",
  	  2, 0, 0)

+DEFPARAM (PARAM_TM_MAX_AGGREGATE_SIZE,
+	  "tm-max-aggregate-size",
+	  "Size in bytes after which thread-local aggregates should be "
+	  "instrumented with the logging functions instead of save/restore "
+	  "pairs",
+	  9, 0, 0)
+
  DEFPARAM (PARAM_IPA_CP_VALUE_LIST_SIZE,
  	  "ipa-cp-value-list-size",
  	  "Maximum size of a list of values associated with each parameter for "