diff mbox

generate dual code paths in transactions

Message ID 5099A6BB.1060403@redhat.com
State New
Headers show

Commit Message

Richard Henderson Nov. 7, 2012, 12:09 a.m. UTC
On 2012-11-05 19:44, Aldy Hernandez wrote:
> There are 2 distinct compiler failures, which look like missed optimizations which I will hopefully be tackling in stage2 (aka tomorrow).
> 
> FAIL: gcc.dg/tm/memopt-4.c scan-tree-dump-times tmedge "tm_save.[0-9_]+ = lala.x\\[55\\]" 1
> FAIL: gcc.dg/tm/memopt-4.c scan-tree-dump-times tmedge "lala.x\\[55\\] = tm_save" 1
> FAIL: gcc.dg/tm/memopt-7.c scan-tree-dump-times tmedge "tm_save.[0-9_]+ = lala" 1
> FAIL: gcc.dg/tm/memopt-7.c scan-tree-dump-times tmedge "lala = tm_save" 1

Patch 4, below, is a follow-on patch to the one Aldy posted which
fixes these problems as well.

> And there is a runtime error, for which Richard has a patch he is testing in a more favorable time zone:
> 
> FAIL: libitm.c/cancel.c (internal compiler error)

This is fixed by patch 1, below.

We expect transactions to be single entry multiple exit regions.  We cannot
allow sharing of basic blocks between regions, as that breaks the expectation.
When such sharing occurs, the tm_edge transformation creates cycles that
result in SSA defs not dominating their uses.  Which of course fails verification.

I considered having the tm_init pass duplicate blocks as necessary to undo
such sharing.  This would be the most robust approach to changes that other
gimple-level optimizations.  But that was too much work to consider as a
stage3 bug fix.

Patches 2 and 3 are split out from the patch Aldy posted.  I thought they
deserved to be committed separately.


r~
diff mbox

Patch

From bbdebfc381fa2463bcb6259c359d8e8f93b6c929 Mon Sep 17 00:00:00 2001
From: Richard Henderson <rth@redhat.com>
Date: Tue, 6 Nov 2012 15:51:39 -0800
Subject: [PATCH 4/4] tm: Add uninstrumented code path

---
 gcc/ChangeLog                               |  23 +
 gcc/cfg-flags.def                           |   6 +
 gcc/testsuite/ChangeLog                     |  12 +
 gcc/testsuite/c-c++-common/tm/trxn-expr-3.c |   2 +-
 gcc/testsuite/gcc.dg/tm/debug-1.c           |   2 +-
 gcc/testsuite/gcc.dg/tm/irrevocable-3.c     |   2 +-
 gcc/testsuite/gcc.dg/tm/irrevocable-4.c     |   2 +-
 gcc/testsuite/gcc.dg/tm/memopt-10.c         |   2 +-
 gcc/testsuite/gcc.dg/tm/memopt-11.c         |   2 +-
 gcc/testsuite/gcc.dg/tm/props-4.c           |   1 -
 gcc/testsuite/gcc.dg/tm/wrap-3.c            |   5 +-
 gcc/testsuite/gcc.dg/tm/wrap-4.c            |   5 +-
 gcc/trans-mem.c                             | 873 ++++++++++++++++++----------
 gcc/trans-mem.h                             |   1 +
 gcc/tree-cfg.c                              |   6 +-
 15 files changed, 631 insertions(+), 313 deletions(-)

diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index ef8173e..5bc5de3 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,4 +1,27 @@ 
 2012-11-06  Aldy Hernandez  <aldyh@redhat.com>
+            Richard Henderson  <rth@redhat.com>
+
+	* cfg-flags.def (TM_UNINSTRUMENTED, TM_ABORT): New.
+	* trans-mem.c (PROB_VERY_LIKELY, PROB_UNLIKELY, PROB_LIKELY): New.
+	(struct tm_region): Add tm_state, restart_block,
+	original_transaction_was_outer.
+	(tm_region_init_0): Initialize them.
+	(collect_bb2reg, get_bb_regions_instrumented): New.
+	(tm_log_emit_save_or_restores): Merge into ...
+	(expand_transaction): ... here.
+	(generate_tm_state, propagate_tm_flags_out): New.
+	(execute_tm_mark): Avoid processing blocks more than once.
+	(split_bb_make_tm_edge): Split out from ...
+	(make_tm_edge): ... here.  And merge the rest into ...
+	(expand_block_edges): ... here.
+	(execute_tm_edges): Avoid processing blocks more than once.
+	(ipa_uninstrument_transaction): New.
+	(ipa_tm_scan_calls_transaction): Use it.  Rebuild cgraph and ssa.
+	(ipa_tm_execute): Init and free original_copy_tables.
+	* trans-mem.h (PR_MULTIWAYCODE): New.
+	* tree-cfg.c (make_edges): Use EDGE_TM_ABORT.
+
+2012-11-06  Aldy Hernandez  <aldyh@redhat.com>
 
 	* cfghooks.c (copy_bbs): Handle a null loop_father.
 
diff --git a/gcc/cfg-flags.def b/gcc/cfg-flags.def
index e9e2dd6..aea8f06 100644
--- a/gcc/cfg-flags.def
+++ b/gcc/cfg-flags.def
@@ -171,6 +171,12 @@  DEF_EDGE_FLAG(CAN_FALLTHRU, 13)
    This flag is only used for the RTL CFG.  */
 DEF_EDGE_FLAG(LOOP_EXIT, 14)
 
+/* Uninstrumented edge out of a GIMPLE_TRANSACTION statement.  */
+DEF_EDGE_FLAG(TM_UNINSTRUMENTED, 15)
+
+/* Abort (over) edge out of a GIMPLE_TRANSACTION statement.  */
+DEF_EDGE_FLAG(TM_ABORT, 16)
+
 #endif
 
 /*
diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog
index ac803f7..654ae5f 100644
--- a/gcc/testsuite/ChangeLog
+++ b/gcc/testsuite/ChangeLog
@@ -1,3 +1,15 @@ 
+2012-11-06  Aldy Hernandez  <aldyh@redhat.com>
+
+	* c-c++-common/tm/trxn-expr-3.c: Adjust for uninstrumented code path.
+	* gcc.dg/tm/debug-1.c: Same.
+	* gcc.dg/tm/irrevocable-3.c: Same.
+	* gcc.dg/tm/irrevocable-4.c: Same.
+	* gcc.dg/tm/memopt-10.c: Same.
+	* gcc.dg/tm/memopt-11.c: Same.
+	* gcc.dg/tm/props-4.c: Same.
+	* gcc.dg/tm/wrap-3.c: Same.
+	* gcc.dg/tm/wrap-4.c: Same.
+
 2012-11-06  Jan Hubicka  <jh@suse.cz>
 
 	* gcc.dg/tree-ssa/loop-1.c: Make to look like a good unroling candidate still.
diff --git a/gcc/testsuite/c-c++-common/tm/trxn-expr-3.c b/gcc/testsuite/c-c++-common/tm/trxn-expr-3.c
index 0a87780..db66873 100644
--- a/gcc/testsuite/c-c++-common/tm/trxn-expr-3.c
+++ b/gcc/testsuite/c-c++-common/tm/trxn-expr-3.c
@@ -10,5 +10,5 @@  int f2()
 }
 
 /* { dg-final { scan-tree-dump-times "ITM_RU" 2 "tmmark" } } */
-/* { dg-final { scan-tree-dump-times "ITM_commitTransaction" 2 "tmmark" } } */
+/* { dg-final { scan-tree-dump-times "ITM_commitTransaction" 4 "tmmark" } } */
 /* { dg-final { cleanup-tree-dump "tmmark" } } */
diff --git a/gcc/testsuite/gcc.dg/tm/debug-1.c b/gcc/testsuite/gcc.dg/tm/debug-1.c
index fae5d6b..01acfae 100644
--- a/gcc/testsuite/gcc.dg/tm/debug-1.c
+++ b/gcc/testsuite/gcc.dg/tm/debug-1.c
@@ -20,7 +20,7 @@  main() {
 }
 
 /* { dg-final { scan-tree-dump-times ": 13:.*b = 9898" 1 "tmmark" } } */
-/* { dg-final { scan-tree-dump-times ": 14:.*__transaction" 1 "tmmark" } } */
+/* { dg-final { scan-tree-dump-times ": 14:.*_ITM_beginTransaction" 1 "tmmark" } } */
 /* { dg-final { scan-tree-dump-times ": 15:.*ITM_WU. \\(&z" 1 "tmmark" } } */
 /* { dg-final { scan-tree-dump-times ": 16:.*ITM_WU. \\(&a" 1 "tmmark" } } */
 /* { dg-final { cleanup-tree-dump "tmmark" } } */
diff --git a/gcc/testsuite/gcc.dg/tm/irrevocable-3.c b/gcc/testsuite/gcc.dg/tm/irrevocable-3.c
index c085479..fdf3e52 100644
--- a/gcc/testsuite/gcc.dg/tm/irrevocable-3.c
+++ b/gcc/testsuite/gcc.dg/tm/irrevocable-3.c
@@ -10,5 +10,5 @@  foo()
 	}
 }
 
-/* { dg-final { scan-tree-dump-times "GTMA_MAY_ENTER_IRREVOCABLE" 1 "tmmark" } } */
+/* { dg-final { scan-tree-dump-times "doesGoIrrevocable" 1 "tmmark" } } */
 /* { dg-final { cleanup-tree-dump "tmmark" } } */
diff --git a/gcc/testsuite/gcc.dg/tm/irrevocable-4.c b/gcc/testsuite/gcc.dg/tm/irrevocable-4.c
index ee759b8..72075df 100644
--- a/gcc/testsuite/gcc.dg/tm/irrevocable-4.c
+++ b/gcc/testsuite/gcc.dg/tm/irrevocable-4.c
@@ -12,5 +12,5 @@  foo()
 	}
 }
 
-/* { dg-final { scan-tree-dump-times "GTMA_MAY_ENTER_IRREVOCABLE" 1 "tmmark" } } */
+/* { dg-final { scan-tree-dump-times "hasNoIrrevocable" 0 "tmmark" } } */
 /* { dg-final { cleanup-tree-dump "tmmark" } } */
diff --git a/gcc/testsuite/gcc.dg/tm/memopt-10.c b/gcc/testsuite/gcc.dg/tm/memopt-10.c
index 5caa6b5..0978bce 100644
--- a/gcc/testsuite/gcc.dg/tm/memopt-10.c
+++ b/gcc/testsuite/gcc.dg/tm/memopt-10.c
@@ -24,5 +24,5 @@  int f()
 
 /* { dg-final { scan-tree-dump-times "ITM_LU" 0 "tmmark" } } */
 /* { dg-final { scan-tree-dump-times "ITM_WU" 0 "tmmark" } } */
-/* { dg-final { scan-tree-dump-times "tm_save" 1 "tmmark" } } */
+/* { dg-final { scan-tree-dump-times "int tm_save" 1 "tmmark" } } */
 /* { dg-final { cleanup-tree-dump "tmmark" } } */
diff --git a/gcc/testsuite/gcc.dg/tm/memopt-11.c b/gcc/testsuite/gcc.dg/tm/memopt-11.c
index 07972a4..36aa664 100644
--- a/gcc/testsuite/gcc.dg/tm/memopt-11.c
+++ b/gcc/testsuite/gcc.dg/tm/memopt-11.c
@@ -25,5 +25,5 @@  int f()
 
 /* { dg-final { scan-tree-dump-times "ITM_LU" 0 "tmmark" } } */
 /* { dg-final { scan-tree-dump-times "ITM_WU" 0 "tmmark" } } */
-/* { dg-final { scan-tree-dump-times "tm_save" 1 "tmmark" } } */
+/* { dg-final { scan-tree-dump-times "int tm_save" 1 "tmmark" } } */
 /* { dg-final { cleanup-tree-dump "tmmark" } } */
diff --git a/gcc/testsuite/gcc.dg/tm/props-4.c b/gcc/testsuite/gcc.dg/tm/props-4.c
index c9d0c2b..c34f5e6 100644
--- a/gcc/testsuite/gcc.dg/tm/props-4.c
+++ b/gcc/testsuite/gcc.dg/tm/props-4.c
@@ -22,6 +22,5 @@  foo(void)
 
 /* { dg-final { scan-tree-dump-times " instrumentedCode" 1 "tmedge" } } */
 /* { dg-final { scan-tree-dump-times "hasNoAbort" 0 "tmedge" } } */
-/* { dg-final { scan-tree-dump-times "LABEL=<L0>" 1 "tmmark" } } */
 /* { dg-final { cleanup-tree-dump "tmedge" } } */
 /* { dg-final { cleanup-tree-dump "tmmark" } } */
diff --git a/gcc/testsuite/gcc.dg/tm/wrap-3.c b/gcc/testsuite/gcc.dg/tm/wrap-3.c
index 0734436..6732471 100644
--- a/gcc/testsuite/gcc.dg/tm/wrap-3.c
+++ b/gcc/testsuite/gcc.dg/tm/wrap-3.c
@@ -10,5 +10,8 @@  void foo()
   __transaction_relaxed { free (p); }
 }
 
-/* { dg-final { scan-tree-dump-times "free" 0 "optimized" } } */
+/* We still have one call to free()-- on the uninstrumented path
+   everything is as usual.  */
+/* { dg-final { scan-tree-dump-times "free" 1 "optimized" } } */
+
 /* { dg-final { cleanup-tree-dump "optimized" } } */
diff --git a/gcc/testsuite/gcc.dg/tm/wrap-4.c b/gcc/testsuite/gcc.dg/tm/wrap-4.c
index 9e1e70c..9a4ec06 100644
--- a/gcc/testsuite/gcc.dg/tm/wrap-4.c
+++ b/gcc/testsuite/gcc.dg/tm/wrap-4.c
@@ -11,5 +11,8 @@  void foo()
   __transaction_relaxed { candy(); }
 }
 
-/* { dg-final { scan-tree-dump-times "candy" 0 "optimized" } } */
+/* We still have one call to candy()-- on the uninstrumented path
+   everything is as usual.  */
+/* { dg-final { scan-tree-dump-times "candy \\(\\);" 1 "optimized" } } */
+
 /* { dg-final { cleanup-tree-dump "optimized" } } */
diff --git a/gcc/trans-mem.c b/gcc/trans-mem.c
index b9774d4..642e088 100644
--- a/gcc/trans-mem.c
+++ b/gcc/trans-mem.c
@@ -37,6 +37,9 @@ 
 
 
 #define PROB_VERY_UNLIKELY	(REG_BR_PROB_BASE / 2000 - 1)
+#define PROB_VERY_LIKELY	(PROB_ALWAYS - PROB_VERY_UNLIKELY)
+#define PROB_UNLIKELY		(REG_BR_PROB_BASE / 5 - 1)
+#define PROB_LIKELY		(PROB_ALWAYS - PROB_VERY_LIKELY)
 #define PROB_ALWAYS		(REG_BR_PROB_BASE)
 
 #define A_RUNINSTRUMENTEDCODE	0x0001
@@ -133,6 +136,10 @@ 
 	over:
 */
 
+static void *expand_regions (struct tm_region *,
+			     void *(*callback)(struct tm_region *, void *),
+			     void *);
+
 
 /* Return the attributes we want to examine for X, or NULL if it's not
    something we examine.  We look at function types, but allow pointers
@@ -1243,72 +1250,6 @@  tm_log_emit_restores (basic_block entry_block, basic_block bb)
     }
 }
 
-/* Emit the checks for performing either a save or a restore sequence.
-
-   TRXN_PROP is either A_SAVELIVEVARIABLES or A_RESTORELIVEVARIABLES.
-
-   The code sequence is inserted in a new basic block created in
-   END_BB which is inserted between BEFORE_BB and the destination of
-   FALLTHRU_EDGE.
-
-   STATUS is the return value from _ITM_beginTransaction.
-   ENTRY_BLOCK is the entry block for the transaction.
-   EMITF is a callback to emit the actual save/restore code.
-
-   The basic block containing the conditional checking for TRXN_PROP
-   is returned.  */
-static basic_block
-tm_log_emit_save_or_restores (basic_block entry_block,
-			      unsigned trxn_prop,
-			      tree status,
-			      void (*emitf)(basic_block, basic_block),
-			      basic_block before_bb,
-			      edge fallthru_edge,
-			      basic_block *end_bb)
-{
-  basic_block cond_bb, code_bb;
-  gimple cond_stmt, stmt;
-  gimple_stmt_iterator gsi;
-  tree t1, t2;
-  int old_flags = fallthru_edge->flags;
-
-  cond_bb = create_empty_bb (before_bb);
-  code_bb = create_empty_bb (cond_bb);
-  *end_bb = create_empty_bb (code_bb);
-  if (current_loops && before_bb->loop_father)
-    {
-      add_bb_to_loop (cond_bb, before_bb->loop_father);
-      add_bb_to_loop (code_bb, before_bb->loop_father);
-      add_bb_to_loop (*end_bb, before_bb->loop_father);
-    }
-  redirect_edge_pred (fallthru_edge, *end_bb);
-  fallthru_edge->flags = EDGE_FALLTHRU;
-  make_edge (before_bb, cond_bb, old_flags);
-
-  set_immediate_dominator (CDI_DOMINATORS, cond_bb, before_bb);
-  set_immediate_dominator (CDI_DOMINATORS, code_bb, cond_bb);
-
-  gsi = gsi_last_bb (cond_bb);
-
-  /* t1 = status & A_{property}.  */
-  t1 = create_tmp_reg (TREE_TYPE (status), NULL);
-  t2 = build_int_cst (TREE_TYPE (status), trxn_prop);
-  stmt = gimple_build_assign_with_ops (BIT_AND_EXPR, t1, status, t2);
-  gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
-
-  /* if (t1).  */
-  t2 = build_int_cst (TREE_TYPE (status), 0);
-  cond_stmt = gimple_build_cond (NE_EXPR, t1, t2, NULL, NULL);
-  gsi_insert_after (&gsi, cond_stmt, GSI_CONTINUE_LINKING);
-
-  emitf (entry_block, code_bb);
-
-  make_edge (cond_bb, code_bb, EDGE_TRUE_VALUE);
-  make_edge (cond_bb, *end_bb, EDGE_FALSE_VALUE);
-  make_edge (code_bb, *end_bb, EDGE_FALLTHRU);
-
-  return cond_bb;
-}
 
 static tree lower_sequence_tm (gimple_stmt_iterator *, bool *,
 			       struct walk_stmt_info *);
@@ -1767,12 +1708,26 @@  struct tm_region
   /* Link to the next outer transaction.  */
   struct tm_region *outer;
 
-  /* The GIMPLE_TRANSACTION statement beginning this transaction.  */
+  /* The GIMPLE_TRANSACTION statement beginning this transaction.
+     After TM_MARK, this gets replaced by a call to
+     BUILT_IN_TM_START.  */
   gimple transaction_stmt;
 
-  /* The entry block to this region.  */
+  /* After TM_MARK expands the GIMPLE_TRANSACTION into a call to
+     BUILT_IN_TM_START, this field is true if the transaction is an
+     outer transaction.  */
+  bool original_transaction_was_outer;
+
+  /* Return value from BUILT_IN_TM_START.  */
+  tree tm_state;
+
+  /* The entry block to this region.  This will always be the first
+     block of the body of the transaction.  */
   basic_block entry_block;
 
+  /* The first block after an expanded call to _ITM_beginTransaction.  */
+  basic_block restart_block;
+
   /* The set of all blocks that end the region; NULL if only EXIT_BLOCK.
      These blocks are still a part of the region (i.e., the border is
      inclusive). Note that this set is only complete for paths in the CFG
@@ -1821,6 +1776,8 @@  tm_region_init_0 (struct tm_region *outer, basic_block bb, gimple stmt)
   region->outer = outer;
 
   region->transaction_stmt = stmt;
+  region->original_transaction_was_outer = false;
+  region->tm_state = NULL;
 
   /* There are either one or two edges out of the block containing
      the GIMPLE_TRANSACTION, one to the actual region and one to the
@@ -2195,6 +2152,7 @@  expand_assign_tm (struct tm_region *region, gimple_stmt_iterator *gsi)
       return;
     }
 
+  // Remove original load/store statement.
   gsi_remove (gsi, true);
 
   if (load_p && !store_p)
@@ -2248,7 +2206,9 @@  expand_assign_tm (struct tm_region *region, gimple_stmt_iterator *gsi)
   if (!store_p)
     requires_barrier (region->entry_block, lhs, gcall);
 
-  /* add_stmt_to_tm_region  (region, gcall); */
+  // The calls to build_tm_{store,load} above inserted the instrumented
+  // call into the stream.
+  // gsi_insert_before (gsi, gcall, GSI_SAME_STMT);
 }
 
 
@@ -2484,6 +2444,65 @@  get_tm_region_blocks (basic_block entry_block,
   return bbs;
 }
 
+// Callback for expand_regions, collect innermost region data for each bb.
+static void *
+collect_bb2reg (struct tm_region *region, void *data)
+{
+  VEC(tm_region_p, heap) *bb2reg = (VEC(tm_region_p, heap) *) data;
+  VEC (basic_block, heap) *queue;
+  unsigned int i;
+  basic_block bb;
+
+  queue = get_tm_region_blocks (region->entry_block,
+				region->exit_blocks,
+				region->irr_blocks,
+				NULL,
+				/*stop_at_irr_p=*/false);
+
+  // We expect expand_region to perform a post-order traversal of the region
+  // tree.  Therefore the last region seen for any bb is the innermost.
+  FOR_EACH_VEC_ELT (basic_block, queue, i, bb)
+    VEC_replace (tm_region_p, bb2reg, bb->index, region);
+
+  VEC_free (basic_block, heap, queue);
+  return NULL;
+}
+
+// Returns a vector, indexed by BB->INDEX, of the innermost tm_region to
+// which a basic block belongs.  Note that we only consider the instrumented
+// code paths for the region; the uninstrumented code paths are ignored.
+//
+// ??? This data is very similar to the bb_regions array that is collected
+// during tm_region_init.  Or, rather, this data is similar to what could
+// be used within tm_region_init.  The actual computation in tm_region_init
+// begins and ends with bb_regions entirely full of NULL pointers, due to
+// the way in which pointers are swapped in and out of the array.
+//
+// ??? Our callers expect that blocks are not shared between transactions.
+// When the optimizers get too smart, and blocks are shared, then during
+// the tm_mark phase we'll add log entries to only one of the two transactions,
+// and in the tm_edge phase we'll add edges to the CFG that create invalid
+// cycles.  The symptom being SSA defs that do not dominate their uses.
+// Note that the optimizers were locally correct with their transformation,
+// as we have no info within the program that suggests that the blocks cannot
+// be shared.
+//
+// ??? There is currently a hack inside tree-ssa-pre.c to work around the
+// only known instance of this block sharing.
+
+static VEC(tm_region_p, heap) *
+get_bb_regions_instrumented (void)
+{
+  unsigned n = last_basic_block;
+  VEC(tm_region_p, heap) *ret;
+
+  ret = VEC_alloc (tm_region_p, heap, n);
+  VEC_safe_grow_cleared (tm_region_p, heap, ret, n);
+  expand_regions (all_tm_regions, collect_bb2reg, ret);
+
+  return ret;
+}
+
 /* Set the IN_TRANSACTION for all gimple statements that appear in a
    transaction.  */
 
@@ -2518,6 +2537,285 @@  compute_transaction_bits (void)
     bitmap_obstack_release (&tm_obstack);
 }
 
+/* Replace the GIMPLE_TRANSACTION in this region with the corresponding
+   call to BUILT_IN_TM_START.  */
+
+static void *
+expand_transaction (struct tm_region *region, void *data ATTRIBUTE_UNUSED)
+{
+  tree tm_start = builtin_decl_explicit (BUILT_IN_TM_START);
+  basic_block transaction_bb = gimple_bb (region->transaction_stmt);
+  tree tm_state = region->tm_state;
+  tree tm_state_type = TREE_TYPE (tm_state);
+  edge abort_edge = NULL;
+  edge inst_edge = NULL;
+  edge uninst_edge = NULL;
+  edge fallthru_edge = NULL;
+
+  // Identify the various successors of the transaction start.
+  {
+    edge_iterator i;
+    edge e;
+    FOR_EACH_EDGE (e, i, transaction_bb->succs)
+      {
+        if (e->flags & EDGE_TM_ABORT)
+	  abort_edge = e;
+        else if (e->flags & EDGE_TM_UNINSTRUMENTED)
+	  uninst_edge = e;
+	else
+	  inst_edge = e;
+        if (e->flags & EDGE_FALLTHRU)
+	  fallthru_edge = e;
+      }
+  }
+
+  /* ??? There are plenty of bits here we're not computing.  */
+  {
+    int subcode = gimple_transaction_subcode (region->transaction_stmt);
+    int flags = 0;
+    if (subcode & GTMA_DOES_GO_IRREVOCABLE)
+      flags |= PR_DOESGOIRREVOCABLE;
+    if ((subcode & GTMA_MAY_ENTER_IRREVOCABLE) == 0)
+      flags |= PR_HASNOIRREVOCABLE;
+    /* If the transaction does not have an abort in lexical scope and is not
+       marked as an outer transaction, then it will never abort.  */
+    if ((subcode & GTMA_HAVE_ABORT) == 0 && (subcode & GTMA_IS_OUTER) == 0)
+      flags |= PR_HASNOABORT;
+    if ((subcode & GTMA_HAVE_STORE) == 0)
+      flags |= PR_READONLY;
+    if (inst_edge)
+      flags |= PR_INSTRUMENTEDCODE;
+    if (uninst_edge)
+      flags |= PR_UNINSTRUMENTEDCODE;
+    if (subcode & GTMA_IS_OUTER)
+      region->original_transaction_was_outer = true;
+    tree t = build_int_cst (tm_state_type, flags);
+    gimple call = gimple_build_call (tm_start, 1, t);
+    gimple_call_set_lhs (call, tm_state);
+    gimple_set_location (call, gimple_location (region->transaction_stmt));
+
+    // Replace the GIMPLE_TRANSACTION with the call to BUILT_IN_TM_START.
+    gimple_stmt_iterator gsi = gsi_last_bb (transaction_bb);
+    gcc_assert (gsi_stmt (gsi) == region->transaction_stmt);
+    gsi_insert_before (&gsi, call, GSI_SAME_STMT);
+    gsi_remove (&gsi, true);
+    region->transaction_stmt = call;
+  }
+
+  // Generate log saves.
+  if (!VEC_empty (tree, tm_log_save_addresses))
+    tm_log_emit_saves (region->entry_block, transaction_bb);
+
+  // In the beginning, we've no tests to perform on transaction restart.
+  // Note that after this point, transaction_bb becomes the "most recent
+  // block containing tests for the transaction".
+  region->restart_block = region->entry_block;
+
+  // Generate log restores.
+  if (!VEC_empty (tree, tm_log_save_addresses))
+    {
+      basic_block test_bb = create_empty_bb (transaction_bb);
+      basic_block code_bb = create_empty_bb (test_bb);
+      basic_block join_bb = create_empty_bb (code_bb);
+      if (current_loops && transaction_bb->loop_father)
+	{
+	  add_bb_to_loop (test_bb, transaction_bb->loop_father);
+	  add_bb_to_loop (code_bb, transaction_bb->loop_father);
+	  add_bb_to_loop (join_bb, transaction_bb->loop_father);
+	}
+      if (region->restart_block == region->entry_block)
+	region->restart_block = test_bb;
+
+      tree t1 = create_tmp_reg (tm_state_type, NULL);
+      tree t2 = build_int_cst (tm_state_type, A_RESTORELIVEVARIABLES);
+      gimple stmt = gimple_build_assign_with_ops (BIT_AND_EXPR, t1,
+						  tm_state, t2);
+      gimple_stmt_iterator gsi = gsi_last_bb (test_bb);
+      gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
+
+      t2 = build_int_cst (tm_state_type, 0);
+      stmt = gimple_build_cond (NE_EXPR, t1, t2, NULL, NULL);
+      gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
+
+      tm_log_emit_restores (region->entry_block, code_bb);
+
+      edge ei = make_edge (transaction_bb, test_bb, EDGE_FALLTHRU);
+      edge et = make_edge (test_bb, code_bb, EDGE_TRUE_VALUE);
+      edge ef = make_edge (test_bb, join_bb, EDGE_FALSE_VALUE);
+      redirect_edge_pred (fallthru_edge, join_bb);
+
+      join_bb->frequency = test_bb->frequency = transaction_bb->frequency;
+      join_bb->count = test_bb->count = transaction_bb->count;
+
+      ei->probability = PROB_ALWAYS;
+      et->probability = PROB_LIKELY;
+      ef->probability = PROB_UNLIKELY;
+      et->count = apply_probability(test_bb->count, et->probability);
+      ef->count = apply_probability(test_bb->count, ef->probability);
+
+      code_bb->count = et->count;
+      code_bb->frequency = EDGE_FREQUENCY (et);
+
+      transaction_bb = join_bb;
+    }
+
+  // If we have an ABORT edge, create a test to perform the abort.
+  if (abort_edge)
+    {
+      basic_block test_bb = create_empty_bb (transaction_bb);
+      if (current_loops && transaction_bb->loop_father)
+	add_bb_to_loop (test_bb, transaction_bb->loop_father);
+      if (region->restart_block == region->entry_block)
+	region->restart_block = test_bb;
+
+      tree t1 = create_tmp_reg (tm_state_type, NULL);
+      tree t2 = build_int_cst (tm_state_type, A_ABORTTRANSACTION);
+      gimple stmt = gimple_build_assign_with_ops (BIT_AND_EXPR, t1,
+						  tm_state, t2);
+      gimple_stmt_iterator gsi = gsi_last_bb (test_bb);
+      gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
+
+      t2 = build_int_cst (tm_state_type, 0);
+      stmt = gimple_build_cond (NE_EXPR, t1, t2, NULL, NULL);
+      gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
+
+      edge ei = make_edge (transaction_bb, test_bb, EDGE_FALLTHRU);
+      test_bb->frequency = transaction_bb->frequency;
+      test_bb->count = transaction_bb->count;
+      ei->probability = PROB_ALWAYS;
+
+      // Not abort edge.  If both are live, chose one at random as we'll
+      // we'll be fixing that up below.
+      redirect_edge_pred (fallthru_edge, test_bb);
+      fallthru_edge->flags = EDGE_FALSE_VALUE;
+      fallthru_edge->probability = PROB_VERY_LIKELY;
+      fallthru_edge->count
+	= apply_probability(test_bb->count, fallthru_edge->probability);
+
+      // Abort/over edge.
+      redirect_edge_pred (abort_edge, test_bb);
+      abort_edge->flags = EDGE_TRUE_VALUE;
+      abort_edge->probability = PROB_VERY_UNLIKELY;
+      abort_edge->count
+	= apply_probability(test_bb->count, abort_edge->probability);
+
+      transaction_bb = test_bb;
+    }
+
+  // If we have both instrumented and uninstrumented code paths, select one.
+  if (inst_edge && uninst_edge)
+    {
+      basic_block test_bb = create_empty_bb (transaction_bb);
+      if (current_loops && transaction_bb->loop_father)
+	add_bb_to_loop (test_bb, transaction_bb->loop_father);
+      if (region->restart_block == region->entry_block)
+	region->restart_block = test_bb;
+
+      tree t1 = create_tmp_reg (tm_state_type, NULL);
+      tree t2 = build_int_cst (tm_state_type, A_RUNUNINSTRUMENTEDCODE);
+
+      gimple stmt = gimple_build_assign_with_ops (BIT_AND_EXPR, t1,
+						  tm_state, t2);
+      gimple_stmt_iterator gsi = gsi_last_bb (test_bb);
+      gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
+
+      t2 = build_int_cst (tm_state_type, 0);
+      stmt = gimple_build_cond (NE_EXPR, t1, t2, NULL, NULL);
+      gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
+
+      // Create the edge into test_bb first, as we want to copy values
+      // out of the fallthru edge.
+      edge e = make_edge (transaction_bb, test_bb, fallthru_edge->flags);
+      e->probability = fallthru_edge->probability;
+      test_bb->count = e->count = fallthru_edge->count;
+      test_bb->frequency = EDGE_FREQUENCY (e);
+
+      // Now update the edges to the inst/uninist implementations.
+      // For now assume that the paths are equally likely.  When using HTM,
+      // we'll try the uninst path first and fallback to inst path if htm
+      // buffers are exceeded.  Without HTM we start with the inst path and
+      // use the uninst path when falling back to serial mode.
+      redirect_edge_pred (inst_edge, test_bb);
+      inst_edge->flags = EDGE_FALSE_VALUE;
+      inst_edge->probability = REG_BR_PROB_BASE / 2;
+      inst_edge->count
+	= apply_probability(test_bb->count, inst_edge->probability);
+
+      redirect_edge_pred (uninst_edge, test_bb);
+      uninst_edge->flags = EDGE_TRUE_VALUE;
+      uninst_edge->probability = REG_BR_PROB_BASE / 2;
+      uninst_edge->count
+	= apply_probability(test_bb->count, uninst_edge->probability);
+    }
+
+  // If we have no previous special cases, and we have PHIs at the beginning
+  // of the atomic region, this means we have a loop at the beginning of the
+  // atomic region that shares the first block.  This can cause problems with
+  // the transaction restart abnormal edges to be added in the tm_edges pass.
+  // Solve this by adding a new empty block to receive the abnormal edges.
+  if (region->restart_block == region->entry_block
+      && phi_nodes (region->entry_block))
+    {
+      basic_block empty_bb = create_empty_bb (transaction_bb);
+      region->restart_block = empty_bb;
+      if (current_loops && transaction_bb->loop_father)
+	add_bb_to_loop (empty_bb, transaction_bb->loop_father);
+
+      redirect_edge_pred (fallthru_edge, empty_bb);
+      make_edge (transaction_bb, empty_bb, EDGE_FALLTHRU);
+    }
+
+  return NULL;
+}
+
+/* Generate the temporary to be used for the return value of
+   BUILT_IN_TM_START.  */
+
+static void *
+generate_tm_state (struct tm_region *region, void *data ATTRIBUTE_UNUSED)
+{
+  tree tm_start = builtin_decl_explicit (BUILT_IN_TM_START);
+  region->tm_state =
+    create_tmp_reg (TREE_TYPE (TREE_TYPE (tm_start)), "tm_state");
+
+  // Reset the subcode, post optimizations.  We'll fill this in
+  // again as we process blocks.
+  if (region->exit_blocks)
+    {
+      unsigned int subcode
+	= gimple_transaction_subcode (region->transaction_stmt);
+
+      if (subcode & GTMA_DOES_GO_IRREVOCABLE)
+	subcode &= (GTMA_DECLARATION_MASK | GTMA_DOES_GO_IRREVOCABLE
+		    | GTMA_MAY_ENTER_IRREVOCABLE);
+      else
+	subcode &= GTMA_DECLARATION_MASK;
+      gimple_transaction_set_subcode (region->transaction_stmt, subcode);
+    }
+
+  return NULL;
+}
+
+// Propagate flags from inner transactions outwards.
+static void
+propagate_tm_flags_out (struct tm_region *region)
+{
+  if (region == NULL)
+    return;
+  propagate_tm_flags_out (region->inner);
+
+  if (region->outer && region->outer->transaction_stmt)
+    {
+      unsigned s = gimple_transaction_subcode (region->transaction_stmt);
+      s &= (GTMA_HAVE_ABORT | GTMA_HAVE_LOAD | GTMA_HAVE_STORE
+            | GTMA_MAY_ENTER_IRREVOCABLE);
+      s |= gimple_transaction_subcode (region->outer->transaction_stmt);
+      gimple_transaction_set_subcode (region->outer->transaction_stmt, s);
+    }
+
+  propagate_tm_flags_out (region->next);
+}
+
 /* Entry point to the MARK phase of TM expansion.  Here we replace
    transactional memory statements with calls to builtins, and function
    calls with their transactional clones (if available).  But we don't
@@ -2526,46 +2824,34 @@  compute_transaction_bits (void)
 static unsigned int
 execute_tm_mark (void)
 {
-  struct tm_region *region;
-  basic_block bb;
-  VEC (basic_block, heap) *queue;
-  size_t i;
-
-  queue = VEC_alloc (basic_block, heap, 10);
   pending_edge_inserts_p = false;
 
-  for (region = all_tm_regions; region ; region = region->next)
-    {
-      tm_log_init ();
-      /* If we have a transaction...  */
-      if (region->exit_blocks)
-	{
-	  unsigned int subcode
-	    = gimple_transaction_subcode (region->transaction_stmt);
+  expand_regions (all_tm_regions, generate_tm_state, NULL);
 
-	  /* Collect a new SUBCODE set, now that optimizations are done...  */
-	  if (subcode & GTMA_DOES_GO_IRREVOCABLE)
-	    subcode &= (GTMA_DECLARATION_MASK | GTMA_DOES_GO_IRREVOCABLE
-			| GTMA_MAY_ENTER_IRREVOCABLE);
-	  else
-	    subcode &= GTMA_DECLARATION_MASK;
-	  gimple_transaction_set_subcode (region->transaction_stmt, subcode);
-	}
+  tm_log_init ();
 
-      queue = get_tm_region_blocks (region->entry_block,
-				    region->exit_blocks,
-				    region->irr_blocks,
-				    NULL,
-				    /*stop_at_irr_p=*/true);
-      for (i = 0; VEC_iterate (basic_block, queue, i, bb); ++i)
-	expand_block_tm (region, bb);
-      VEC_free (basic_block, heap, queue);
+  VEC(tm_region_p, heap) *bb_regions = get_bb_regions_instrumented ();
+  struct tm_region *r;
+  unsigned i;
 
-      tm_log_emit ();
-    }
+  // Expand memory operations into calls into the runtime.
+  // This collects log entries as well.
+  FOR_EACH_VEC_ELT (tm_region_p, bb_regions, i, r)
+    if (r != NULL)
+      expand_block_tm (r, BASIC_BLOCK (i));
+
+  // Propagate flags from inner transactions outwards.
+  propagate_tm_flags_out (all_tm_regions);
+
+  // Expand GIMPLE_TRANSACTIONs into calls into the runtime.
+  expand_regions (all_tm_regions, expand_transaction, NULL);
+
+  tm_log_emit ();
+  tm_log_delete ();
 
   if (pending_edge_inserts_p)
     gsi_commit_edge_inserts ();
+  free_dominance_info (CDI_DOMINATORS);
   return 0;
 }
 
@@ -2590,23 +2876,33 @@  struct gimple_opt_pass pass_tm_mark =
  }
 };
 
-/* Create an abnormal call edge from BB to the first block of the region
-   represented by STATE.  Also record the edge in the TM_RESTART map.  */
+
+/* Create an abnormal edge from STMT at iter, splitting the block
+   as necessary.  Adjust *PNEXT as needed for the split block.  */
 
 static inline void
-make_tm_edge (gimple stmt, basic_block bb, struct tm_region *region)
+split_bb_make_tm_edge (gimple stmt, basic_block dest_bb,
+                       gimple_stmt_iterator iter, gimple_stmt_iterator *pnext)
 {
-  void **slot;
-  struct tm_restart_node *n, dummy;
+  basic_block bb = gimple_bb (stmt);
+  if (!gsi_one_before_end_p (iter))
+    {
+      edge e = split_block (bb, stmt);
+      *pnext = gsi_start_bb (e->dest);
+    }
+  make_edge (bb, dest_bb, EDGE_ABNORMAL);
 
+  // Record the need for the edge for the benefit of the rtl passes.
   if (cfun->gimple_df->tm_restart == NULL)
     cfun->gimple_df->tm_restart = htab_create_ggc (31, struct_ptr_hash,
 						   struct_ptr_eq, ggc_free);
 
+  struct tm_restart_node dummy;
   dummy.stmt = stmt;
-  dummy.label_or_list = gimple_block_label (region->entry_block);
-  slot = htab_find_slot (cfun->gimple_df->tm_restart, &dummy, INSERT);
-  n = (struct tm_restart_node *) *slot;
+  dummy.label_or_list = gimple_block_label (dest_bb);
+
+  void **slot = htab_find_slot (cfun->gimple_df->tm_restart, &dummy, INSERT);
+  struct tm_restart_node *n = (struct tm_restart_node *) *slot;
   if (n == NULL)
     {
       n = ggc_alloc_tm_restart_node ();
@@ -2616,216 +2912,86 @@  make_tm_edge (gimple stmt, basic_block bb, struct tm_region *region)
     {
       tree old = n->label_or_list;
       if (TREE_CODE (old) == LABEL_DECL)
-	old = tree_cons (NULL, old, NULL);
+        old = tree_cons (NULL, old, NULL);
       n->label_or_list = tree_cons (NULL, dummy.label_or_list, old);
     }
-
-  make_edge (bb, region->entry_block, EDGE_ABNORMAL);
 }
 
-
 /* Split block BB as necessary for every builtin function we added, and
    wire up the abnormal back edges implied by the transaction restart.  */
 
 static void
-expand_block_edges (struct tm_region *region, basic_block bb)
+expand_block_edges (struct tm_region *const region, basic_block bb)
 {
-  gimple_stmt_iterator gsi;
+  gimple_stmt_iterator gsi, next_gsi;
 
-  for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); )
+  for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi = next_gsi)
     {
-      bool do_next = true;
       gimple stmt = gsi_stmt (gsi);
 
-      /* ??? TM_COMMIT (and any other tm builtin function) in a nested
-	 transaction has an abnormal edge back to the outer-most transaction
-	 (there are no nested retries), while a TM_ABORT also has an abnormal
-	 backedge to the inner-most transaction.  We haven't actually saved
-	 the inner-most transaction here.  We should be able to get to it
-	 via the region_nr saved on STMT, and read the transaction_stmt from
-	 that, and find the first region block from there.  */
-      /* ??? Shouldn't we split for any non-pure, non-irrevocable function?  */
-      if (gimple_code (stmt) == GIMPLE_CALL
-	  && (gimple_call_flags (stmt) & ECF_TM_BUILTIN) != 0)
+      next_gsi = gsi;
+      gsi_next (&next_gsi);
+
+      // ??? Shouldn't we split for any non-pure, non-irrevocable function?
+      if (gimple_code (stmt) != GIMPLE_CALL
+	  || (gimple_call_flags (stmt) & ECF_TM_BUILTIN) == 0)
+	continue;
+
+      if (DECL_FUNCTION_CODE (gimple_call_fndecl (stmt)) == BUILT_IN_TM_ABORT)
 	{
-	  if (gsi_one_before_end_p (gsi))
-	    make_tm_edge (stmt, bb, region);
-	  else
+	  // If we have a ``_transaction_cancel [[outer]]'', there is only
+	  // one abnormal edge: to the transaction marked OUTER.
+	  // All compiler-generated instances of BUILT_IN_TM_ABORT have a
+	  // constant argument, which we can examine here.  Users invoking
+	  // TM_ABORT directly get what they deserve.
+	  tree arg = gimple_call_arg (stmt, 0);
+	  if (TREE_CODE (arg) == INTEGER_CST
+	      && (TREE_INT_CST_LOW (arg) & AR_OUTERABORT) != 0
+	      && !decl_is_tm_clone (current_function_decl))
 	    {
-	      edge e = split_block (bb, stmt);
-	      make_tm_edge (stmt, bb, region);
-	      bb = e->dest;
-	      gsi = gsi_start_bb (bb);
-	      do_next = false;
+	      // Find the GTMA_IS_OUTER transaction.
+	      for (struct tm_region *o = region; o; o = o->outer)
+		if (o->original_transaction_was_outer)
+		  {
+		    split_bb_make_tm_edge (stmt, o->restart_block,
+					   gsi, &next_gsi);
+		    break;
+		  }
+
+	      // Otherwise, the front-end should have semantically checked
+	      // outer aborts, but in either case the target region is not
+	      // within this function.
+	      continue;
 	    }
 
-	  /* Delete any tail-call annotation that may have been added.
-	     The tail-call pass may have mis-identified the commit as being
-	     a candidate because we had not yet added this restart edge.  */
-	  gimple_call_set_tail (stmt, false);
+	  // Non-outer, TM aborts have an abnormal edge to the inner-most
+	  // transaction, the one being aborted;
+	  split_bb_make_tm_edge (stmt, region->restart_block, gsi, &next_gsi);
 	}
 
-      if (do_next)
-	gsi_next (&gsi);
-    }
-}
-
-/* Expand the GIMPLE_TRANSACTION statement into the STM library call.  */
-
-static void
-expand_transaction (struct tm_region *region)
-{
-  tree status, tm_start;
-  basic_block atomic_bb, slice_bb;
-  gimple_stmt_iterator gsi;
-  tree t1, t2;
-  gimple g;
-  int flags, subcode;
-
-  tm_start = builtin_decl_explicit (BUILT_IN_TM_START);
-  status = create_tmp_reg (TREE_TYPE (TREE_TYPE (tm_start)), "tm_state");
-
-  /* ??? There are plenty of bits here we're not computing.  */
-  subcode = gimple_transaction_subcode (region->transaction_stmt);
-  if (subcode & GTMA_DOES_GO_IRREVOCABLE)
-    flags = PR_DOESGOIRREVOCABLE | PR_UNINSTRUMENTEDCODE;
-  else
-    flags = PR_INSTRUMENTEDCODE;
-  if ((subcode & GTMA_MAY_ENTER_IRREVOCABLE) == 0)
-    flags |= PR_HASNOIRREVOCABLE;
-  /* If the transaction does not have an abort in lexical scope and is not
-     marked as an outer transaction, then it will never abort.  */
-  if ((subcode & GTMA_HAVE_ABORT) == 0
-      && (subcode & GTMA_IS_OUTER) == 0)
-    flags |= PR_HASNOABORT;
-  if ((subcode & GTMA_HAVE_STORE) == 0)
-    flags |= PR_READONLY;
-  t2 = build_int_cst (TREE_TYPE (status), flags);
-  g = gimple_build_call (tm_start, 1, t2);
-  gimple_call_set_lhs (g, status);
-  gimple_set_location (g, gimple_location (region->transaction_stmt));
-
-  atomic_bb = gimple_bb (region->transaction_stmt);
-
-  if (!VEC_empty (tree, tm_log_save_addresses))
-    tm_log_emit_saves (region->entry_block, atomic_bb);
-
-  gsi = gsi_last_bb (atomic_bb);
-  gsi_insert_before (&gsi, g, GSI_SAME_STMT);
-  gsi_remove (&gsi, true);
-
-  if (!VEC_empty (tree, tm_log_save_addresses))
-    region->entry_block =
-      tm_log_emit_save_or_restores (region->entry_block,
-				    A_RESTORELIVEVARIABLES,
-				    status,
-				    tm_log_emit_restores,
-				    atomic_bb,
-				    FALLTHRU_EDGE (atomic_bb),
-				    &slice_bb);
-  else
-    slice_bb = atomic_bb;
-
-  /* If we have an ABORT statement, create a test following the start
-     call to perform the abort.  */
-  if (gimple_transaction_label (region->transaction_stmt))
-    {
-      edge e;
-      basic_block test_bb;
-
-      test_bb = create_empty_bb (slice_bb);
-      if (current_loops && slice_bb->loop_father)
-	add_bb_to_loop (test_bb, slice_bb->loop_father);
-      if (VEC_empty (tree, tm_log_save_addresses))
-	region->entry_block = test_bb;
-      gsi = gsi_last_bb (test_bb);
-
-      t1 = create_tmp_reg (TREE_TYPE (status), NULL);
-      t2 = build_int_cst (TREE_TYPE (status), A_ABORTTRANSACTION);
-      g = gimple_build_assign_with_ops (BIT_AND_EXPR, t1, status, t2);
-      gsi_insert_after (&gsi, g, GSI_CONTINUE_LINKING);
-
-      t2 = build_int_cst (TREE_TYPE (status), 0);
-      g = gimple_build_cond (NE_EXPR, t1, t2, NULL, NULL);
-      gsi_insert_after (&gsi, g, GSI_CONTINUE_LINKING);
-
-      e = FALLTHRU_EDGE (slice_bb);
-      redirect_edge_pred (e, test_bb);
-      e->flags = EDGE_FALSE_VALUE;
-      e->probability = PROB_ALWAYS - PROB_VERY_UNLIKELY;
-
-      e = BRANCH_EDGE (atomic_bb);
-      redirect_edge_pred (e, test_bb);
-      e->flags = EDGE_TRUE_VALUE;
-      e->probability = PROB_VERY_UNLIKELY;
-
-      e = make_edge (slice_bb, test_bb, EDGE_FALLTHRU);
-    }
-
-  /* If we've no abort, but we do have PHIs at the beginning of the atomic
-     region, that means we've a loop at the beginning of the atomic region
-     that shares the first block.  This can cause problems with the abnormal
-     edges we're about to add for the transaction restart.  Solve this by
-     adding a new empty block to receive the abnormal edges.  */
-  else if (phi_nodes (region->entry_block))
-    {
-      edge e;
-      basic_block empty_bb;
-
-      region->entry_block = empty_bb = create_empty_bb (atomic_bb);
-      if (current_loops && atomic_bb->loop_father)
-	add_bb_to_loop (empty_bb, atomic_bb->loop_father);
-
-      e = FALLTHRU_EDGE (atomic_bb);
-      redirect_edge_pred (e, empty_bb);
-
-      e = make_edge (atomic_bb, empty_bb, EDGE_FALLTHRU);
-    }
-
-  /* The GIMPLE_TRANSACTION statement no longer exists.  */
-  region->transaction_stmt = NULL;
-}
-
-static void expand_regions (struct tm_region *);
-
-/* Helper function for expand_regions.  Expand REGION and recurse to
-   the inner region.  */
-
-static void
-expand_regions_1 (struct tm_region *region)
-{
-  if (region->exit_blocks)
-    {
-      unsigned int i;
-      basic_block bb;
-      VEC (basic_block, heap) *queue;
+      // All TM builtins have an abnormal edge to the outer-most transaction.
+      // We never restart inner transactions.  For tm clones, we know a-priori
+      // that the outer-most transaction is outside the function.
+      if (decl_is_tm_clone (current_function_decl))
+	continue;
 
-      /* Collect the set of blocks in this region.  Do this before
-	 splitting edges, so that we don't have to play with the
-	 dominator tree in the middle.  */
-      queue = get_tm_region_blocks (region->entry_block,
-				    region->exit_blocks,
-				    region->irr_blocks,
-				    NULL,
-				    /*stop_at_irr_p=*/false);
-      expand_transaction (region);
-      for (i = 0; VEC_iterate (basic_block, queue, i, bb); ++i)
-	expand_block_edges (region, bb);
-      VEC_free (basic_block, heap, queue);
-    }
-  if (region->inner)
-    expand_regions (region->inner);
-}
+      if (cfun->gimple_df->tm_restart == NULL)
+	cfun->gimple_df->tm_restart
+	  = htab_create_ggc (31, struct_ptr_hash, struct_ptr_eq, ggc_free);
 
-/* Expand regions starting at REGION.  */
+      // All TM builtins have an abnormal edge to the outer-most transaction.
+      // We never restart inner transactions.
+      for (struct tm_region *o = region; o; o = o->outer)
+	if (!o->outer)
+	  {
+            split_bb_make_tm_edge (stmt, o->restart_block, gsi, &next_gsi);
+	    break;
+	  }
 
-static void
-expand_regions (struct tm_region *region)
-{
-  while (region)
-    {
-      expand_regions_1 (region);
-      region = region->next;
+      // Delete any tail-call annotation that may have been added.
+      // The tail-call pass may have mis-identified the commit as being
+      // a candidate because we had not yet added this restart edge.
+      gimple_call_set_tail (stmt, false);
     }
 }
 
@@ -2834,8 +3000,15 @@  expand_regions (struct tm_region *region)
 static unsigned int
 execute_tm_edges (void)
 {
-  expand_regions (all_tm_regions);
-  tm_log_delete ();
+  VEC(tm_region_p, heap) *bb_regions = get_bb_regions_instrumented ();
+  struct tm_region *r;
+  unsigned i;
+
+  FOR_EACH_VEC_ELT (tm_region_p, bb_regions, i, r)
+    if (r != NULL)
+      expand_block_edges (r, BASIC_BLOCK (i));
+
+  VEC_free (tm_region_p, heap, bb_regions);
 
   /* We've got to release the dominance info now, to indicate that it
      must be rebuilt completely.  Otherwise we'll crash trying to update
@@ -2868,6 +3041,54 @@  struct gimple_opt_pass pass_tm_edges =
  }
 };
 
+/* Helper function for expand_regions.  Expand REGION and recurse to
+   the inner region.  Call CALLBACK on each region.  CALLBACK returns
+   NULL to continue the traversal, otherwise a non-null value which
+   this function will return as well.  */
+
+static void *
+expand_regions_1 (struct tm_region *region,
+		  void *(*callback)(struct tm_region *, void *),
+		  void *data)
+{
+  void *retval = NULL;
+  if (region->exit_blocks)
+    {
+      retval = callback (region, data);
+      if (retval)
+	return retval;
+    }
+  if (region->inner)
+    {
+      retval = expand_regions (region->inner, callback, data);
+      if (retval)
+	return retval;
+    }
+  return retval;
+}
+
+/* Traverse the regions enclosed and including REGION.  Execute
+   CALLBACK for each region, passing DATA.  CALLBACK returns NULL to
+   continue the traversal, otherwise a non-null value which this
+   function will return as well.  */
+
+static void *
+expand_regions (struct tm_region *region,
+		void *(*callback)(struct tm_region *, void *),
+		void *data)
+{
+  void *retval = NULL;
+  while (region)
+    {
+      retval = expand_regions_1 (region, callback, data);
+      if (retval)
+	return retval;
+      region = region->next;
+    }
+  return retval;
+}
+
+
 /* A unique TM memory operation.  */
 typedef struct tm_memop
 {
@@ -3657,6 +3878,35 @@  maybe_push_queue (struct cgraph_node *node,
     }
 }
 
+/* Duplicate the basic blocks in QUEUE for use in the uninstrumented
+   code path.  QUEUE are the basic blocks inside the transaction
+   represented in REGION.
+
+   Later in split_code_paths() we will add the conditional to choose
+   between the two alternatives.  */
+
+static void
+ipa_uninstrument_transaction (struct tm_region *region,
+			      VEC (basic_block, heap) *queue)
+{
+  gimple transaction = region->transaction_stmt;
+  basic_block transaction_bb = gimple_bb (transaction);
+  int n = VEC_length (basic_block, queue);
+  basic_block *new_bbs = XNEWVEC (basic_block, n);
+
+  copy_bbs (VEC_address (basic_block, queue), n, new_bbs,
+	    NULL, 0, NULL, NULL, transaction_bb);
+  edge e = make_edge (transaction_bb, new_bbs[0], EDGE_TM_UNINSTRUMENTED);
+  add_phi_args_after_copy (new_bbs, n, e);
+
+  // Now we will have a GIMPLE_ATOMIC with 3 possible edges out of it.
+  //   a) EDGE_FALLTHRU into the transaction
+  //   b) EDGE_TM_ABORT out of the transaction
+  //   c) EDGE_TM_UNINSTRUMENTED into the uninstrumented blocks.
+
+  free (new_bbs);
+}
+
 /* A subroutine of ipa_tm_scan_calls_transaction and ipa_tm_scan_calls_clone.
    Queue all callees within block BB.  */
 
@@ -3718,11 +3968,29 @@  ipa_tm_scan_calls_transaction (struct tm_ipa_cg_data *d,
       bbs = get_tm_region_blocks (r->entry_block, r->exit_blocks, NULL,
 				  d->transaction_blocks_normal, false);
 
+      // Generate the uninstrumented code path for this transaction.
+      ipa_uninstrument_transaction (r, bbs);
+
       FOR_EACH_VEC_ELT (basic_block, bbs, i, bb)
 	ipa_tm_scan_calls_block (callees_p, bb, false);
 
       VEC_free (basic_block, heap, bbs);
     }
+
+  // ??? copy_bbs should maintain cgraph edges for the blocks as it is
+  // copying them, rather than forcing us to do this externally.
+  rebuild_cgraph_edges ();
+
+  // ??? In ipa_uninstrument_transaction we don't try to update dominators
+  // because copy_bbs doesn't return a VEC like iterate_fix_dominators expects.
+  // Instead, just release dominators here so update_ssa recomputes them.
+  free_dominance_info (CDI_DOMINATORS);
+
+  // When building the uninstrumented code path, copy_bbs will have invoked
+  // create_new_def_for starting an "ssa update context".  There is only one
+  // instance of this context, so resolve ssa updates before moving on to
+  // the next function.
+  update_ssa (TODO_update_ssa);
 }
 
 /* Scan all calls in NODE as if this is the transactional clone,
@@ -4819,6 +5087,7 @@  ipa_tm_execute (void)
 #endif
 
   bitmap_obstack_initialize (&tm_obstack);
+  initialize_original_copy_tables ();
 
   /* For all local functions marked tm_callable, queue them.  */
   FOR_EACH_DEFINED_FUNCTION (node)
@@ -4852,7 +5121,8 @@  ipa_tm_execute (void)
 	  {
 	    d = get_cg_data (&node, true);
 
-	    /* Scan for calls that are in each transaction.  */
+	    /* Scan for calls that are in each transaction, and
+	       generate the uninstrumented code path.  */
 	    ipa_tm_scan_calls_transaction (d, &tm_callees);
 
 	    /* Put it in the worklist so we can scan the function
@@ -5057,6 +5327,7 @@  ipa_tm_execute (void)
   VEC_free (cgraph_node_p, heap, tm_callees);
   VEC_free (cgraph_node_p, heap, irr_worklist);
   bitmap_obstack_release (&tm_obstack);
+  free_original_copy_tables ();
 
   FOR_EACH_FUNCTION (node)
     node->symbol.aux = NULL;
@@ -5084,7 +5355,7 @@  struct simple_ipa_opt_pass pass_ipa_tm =
   0,			                /* properties_provided */
   0,					/* properties_destroyed */
   0,					/* todo_flags_start */
-  0,             			/* todo_flags_finish */
+  TODO_update_ssa,      		/* todo_flags_finish */
  },
 };
 
diff --git a/gcc/trans-mem.h b/gcc/trans-mem.h
index 95e9e7e..58ad2a2 100644
--- a/gcc/trans-mem.h
+++ b/gcc/trans-mem.h
@@ -21,6 +21,7 @@ 
 /* These defines must match the enumerations in libitm.h.  */
 #define PR_INSTRUMENTEDCODE	0x0001
 #define PR_UNINSTRUMENTEDCODE	0x0002
+#define PR_MULTIWAYCODE		(PR_INSTRUMENTEDCODE | PR_UNINSTRUMENTEDCODE)
 #define PR_HASNOXMMUPDATE	0x0004
 #define PR_HASNOABORT		0x0008
 #define PR_HASNOIRREVOCABLE	0x0020
diff --git a/gcc/tree-cfg.c b/gcc/tree-cfg.c
index 183b9b9..5f74646 100644
--- a/gcc/tree-cfg.c
+++ b/gcc/tree-cfg.c
@@ -666,7 +666,7 @@  make_edges (void)
 	      {
 		tree abort_label = gimple_transaction_label (last);
 		if (abort_label)
-		  make_edge (bb, label_to_block (abort_label), 0);
+		  make_edge (bb, label_to_block (abort_label), EDGE_TM_ABORT);
 		fallthru = true;
 	      }
 	      break;
@@ -2062,7 +2062,7 @@  gimple_debug_bb_n (int n)
 /* Dump the CFG on stderr.
 
    FLAGS are the same used by the tree dumping functions
-   (see TDF_* in tree-pass.h).  */
+   (see TDF_* in dumpfile.h).  */
 
 void
 gimple_debug_cfg (int flags)
@@ -6737,7 +6737,7 @@  move_sese_region_to_fn (struct function *dest_cfun, basic_block entry_bb,
 }
 
 
-/* Dump FUNCTION_DECL FN to file FILE using FLAGS (see TDF_* in tree-pass.h)
+/* Dump FUNCTION_DECL FN to file FILE using FLAGS (see TDF_* in dumpfile.h)
    */
 
 void
-- 
1.7.11.7