diff --git a/libitm/beginend.cc b/libitm/beginend.cc
index e6a84de..4369946 100644
--- a/libitm/beginend.cc
+++ b/libitm/beginend.cc
@@ -54,6 +54,8 @@ static pthread_mutex_t global_tid_lock = PTHREAD_MUTEX_INITIALIZER;
 static pthread_key_t thr_release_key;
 static pthread_once_t thr_release_once = PTHREAD_ONCE_INIT;
 
+// See gtm_thread::begin_transaction.
+uint32_t GTM::htm_fastpath = 0;
 
 /* Allocate a transaction structure.  */
 void *
@@ -163,6 +165,70 @@ GTM::gtm_thread::begin_transaction (uint32_t prop, const gtm_jmpbuf *jb)
   if (unlikely(prop & pr_undoLogCode))
     GTM_fatal("pr_undoLogCode not supported");
 
+#if defined(USE_HTM_FASTPATH) && !defined(HTM_CUSTOM_FASTPATH)
+  // HTM fastpath.  Only chosen in the absence of transaction_cancel to allow
+  // using an uninstrumented code path.
+  // The fastpath is enabled only by dispatch_htm's method group, which uses
+  // serial-mode methods as fallback.  Serial-mode transactions cannot execute
+  // concurrently with HW transactions because the latter monitor the serial
+  // lock's writer flag and thus abort if another thread is or becomes a
+  // serial transaction.  Therefore, if the fastpath is enabled, then a
+  // transaction is not executing as a HW transaction iff the serial lock is
+  // write-locked.  This allows us to use htm_fastpath and the serial lock's
+  // writer flag to reliable determine whether the current thread runs a HW
+  // transaction, and thus we do not need to maintain this information in
+  // per-thread state.
+  // If an uninstrumented code path is not available, we can still run
+  // instrumented code from a HW transaction because the HTM fastpath kicks
+  // in early in both begin and commit, and the transaction is not canceled.
+  // HW transactions might get requests to switch to serial-irrevocable mode,
+  // but these can be ignored because the HTM provides all necessary
+  // correctness guarantees.  Transactions cannot detect whether they are
+  // indeed in serial mode, and HW transactions should never need serial mode
+  // for any internal changes (e.g., they never abort visibly to the STM code
+  // and thus do not trigger the standard retry handling).
+  if (likely(htm_fastpath && (prop & pr_hasNoAbort)))
+    {
+      for (uint32_t t = htm_fastpath; t; t--)
+	{
+	  uint32_t ret = htm_begin();
+	  if (htm_begin_success(ret))
+	    {
+	      // We are executing a transaction now.
+	      // Monitor the writer flag in the serial-mode lock, and abort
+	      // if there is an active or waiting serial-mode transaction.
+	      if (unlikely(serial_lock.is_write_locked()))
+		htm_abort();
+	      else
+		// We do not need to set a_saveLiveVariables because of HTM.
+		return (prop & pr_uninstrumentedCode) ?
+		    a_runUninstrumentedCode : a_runInstrumentedCode;
+	    }
+	  // The transaction has aborted.  Don't retry if it's unlikely that
+	  // retrying the transaction will be successful.
+	  if (!htm_abort_should_retry(ret))
+	    break;
+	  // Wait until any concurrent serial-mode transactions have finished.
+	  // This is an empty critical section, but won't be elided.
+	  if (serial_lock.is_write_locked())
+	    {
+	      tx = gtm_thr();
+	      if (unlikely(tx == NULL))
+	        {
+	          // See below.
+	          tx = new gtm_thread();
+	          set_gtm_thr(tx);
+	        }
+	      serial_lock.read_lock(tx);
+	      serial_lock.read_unlock(tx);
+	      // TODO We should probably reset the retry count t here, unless
+	      // we have retried so often that we should go serial to avoid
+	      // starvation.
+	    }
+	}
+    }
+#endif
+
   tx = gtm_thr();
   if (unlikely(tx == NULL))
     {
@@ -537,6 +603,17 @@ GTM::gtm_thread::restart (gtm_restart_reason r, bool finish_serial_upgrade)
 void ITM_REGPARM
 _ITM_commitTransaction(void)
 {
+#if defined(USE_HTM_FASTPATH)
+  // HTM fastpath.  If we are not executing a HW transaction, then we will be
+  // a serial-mode transaction.  If we are, then there will be no other
+  // concurrent serial-mode transaction.
+  // See gtm_thread::begin_transaction.
+  if (likely(htm_fastpath && !gtm_thread::serial_lock.is_write_locked()))
+    {
+      htm_commit();
+      return;
+    }
+#endif
   gtm_thread *tx = gtm_thr();
   if (!tx->trycommit ())
     tx->restart (RESTART_VALIDATE_COMMIT);
@@ -545,6 +622,14 @@ _ITM_commitTransaction(void)
 void ITM_REGPARM
 _ITM_commitTransactionEH(void *exc_ptr)
 {
+#if defined(USE_HTM_FASTPATH)
+  // See _ITM_commitTransaction.
+  if (likely(htm_fastpath && !gtm_thread::serial_lock.is_write_locked()))
+    {
+      htm_commit();
+      return;
+    }
+#endif
   gtm_thread *tx = gtm_thr();
   if (!tx->trycommit ())
     {
diff --git a/libitm/config/linux/rwlock.h b/libitm/config/linux/rwlock.h
index 987e580..f13d287 100644
--- a/libitm/config/linux/rwlock.h
+++ b/libitm/config/linux/rwlock.h
@@ -1,4 +1,4 @@
-/* Copyright (C) 2011 Free Software Foundation, Inc.
+/* Copyright (C) 2011, 2012 Free Software Foundation, Inc.
    Contributed by Torvald Riegel <triegel@redhat.com>.
 
    This file is part of the GNU Transactional Memory Library (libitm).
@@ -59,6 +59,14 @@ class gtm_rwlock
   bool write_upgrade (gtm_thread *tx);
   void write_upgrade_finish (gtm_thread *tx);
 
+  // Returns true iff there is a concurrent active or waiting writer.
+  // This is primarily useful for simple HyTM approaches, and the value being
+  // checked is loaded with memory_order_relaxed.
+  bool is_write_locked()
+  {
+    return writers.load (memory_order_relaxed) != 0;
+  }
+
  protected:
   bool write_lock_generic (gtm_thread *tx);
 };
diff --git a/libitm/config/posix/rwlock.h b/libitm/config/posix/rwlock.h
index a1a6042..79f1429 100644
--- a/libitm/config/posix/rwlock.h
+++ b/libitm/config/posix/rwlock.h
@@ -1,4 +1,4 @@
-/* Copyright (C) 2009, 2011 Free Software Foundation, Inc.
+/* Copyright (C) 2009, 2011, 2012 Free Software Foundation, Inc.
    Contributed by Richard Henderson <rth@redhat.com>.
 
    This file is part of the GNU Transactional Memory Library (libitm).
@@ -74,6 +74,14 @@ class gtm_rwlock
   bool write_upgrade (gtm_thread *tx);
   void write_upgrade_finish (gtm_thread *tx);
 
+  // Returns true iff there is a concurrent active or waiting writer.
+  // This is primarily useful for simple HyTM approaches, and the value being
+  // checked is loaded with memory_order_relaxed.
+  bool is_write_locked()
+  {
+    return summary.load (memory_order_relaxed) & (a_writer | w_writer);
+  }
+
  protected:
   bool write_lock_generic (gtm_thread *tx);
 };
diff --git a/libitm/config/x86/target.h b/libitm/config/x86/target.h
index 74f4f92..41ae2eb 100644
--- a/libitm/config/x86/target.h
+++ b/libitm/config/x86/target.h
@@ -1,4 +1,4 @@
-/* Copyright (C) 2008, 2009, 2011 Free Software Foundation, Inc.
+/* Copyright (C) 2008, 2009, 2011, 2012 Free Software Foundation, Inc.
    Contributed by Richard Henderson <rth@redhat.com>.
 
    This file is part of the GNU Transactional Memory Library (libitm).
@@ -22,6 +22,22 @@
    see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
    <http://www.gnu.org/licenses/>.  */
 
+// We'll be using some of the cpu builtins, and their associated types.
+#ifndef __cplusplus
+/* ??? It's broken for C++. */
+#include <x86intrin.h>
+#else
+# ifdef __SSE2__
+#  include <emmintrin.h>
+# elif defined(__SSE__)
+#  include <xmmintrin.h>
+# endif
+# if defined(__AVX__) || defined(__RTM__)
+#  include <immintrin.h>
+# endif
+#endif
+#include <cpuid.h>
+
 namespace GTM HIDDEN {
 
 /* ??? This doesn't work for Win64.  */
@@ -62,19 +78,66 @@ cpu_relax (void)
   __builtin_ia32_pause ();
 }
 
-} // namespace GTM
+// Use Intel RTM if supported by the assembler.
+// See gtm_thread::begin_transaction for how these functions are used.
+#ifdef HAVE_AS_RTM
+#define USE_HTM_FASTPATH
 
-// We'll be using some of the cpu builtins, and their associated types.
-#ifndef __cplusplus
-/* ??? It's broken for C++. */
-#include <x86intrin.h>
-#else
-# ifdef __SSE2__
-#  include <emmintrin.h>
-# elif defined(__SSE__)
-#  include <xmmintrin.h>
-# endif
-# ifdef __AVX__
-#  include <immintrin.h>
-# endif
+static inline bool
+htm_available ()
+{
+  const unsigned cpuid_rtm = (1 << 11);
+  if (__get_cpuid_max (0, NULL) >= 7)
+    {
+      unsigned a, b, c, d;
+      __cpuid_count (7, 0, a, b, c, d);
+      if (b & cpuid_rtm)
+	return true;
+    }
+  return false;
+}
+
+static inline uint32_t
+htm_init ()
+{
+  // Maximum number of times we try to execute a transaction as a HW
+  // transaction.
+  // ??? Why 2?  Any offline or runtime tuning necessary?
+  return htm_available () ? 2 : 0;
+}
+
+static inline uint32_t
+htm_begin ()
+{
+  return _xbegin();
+}
+
+static inline bool
+htm_begin_success (uint32_t begin_ret)
+{
+  return begin_ret == _XBEGIN_STARTED;
+}
+
+static inline void
+htm_commit ()
+{
+  _xend();
+}
+
+static inline void
+htm_abort ()
+{
+  // ??? According to a yet unpublished ABI rule, 0xff is reserved and
+  // supposed to signal a busy lock.  Source: andi.kleen@intel.com
+  _xabort(0xff);
+}
+
+static inline bool
+htm_abort_should_retry (uint32_t begin_ret)
+{
+  return begin_ret & _XABORT_RETRY;
+}
 #endif
+
+
+} // namespace GTM
diff --git a/libitm/configure.tgt b/libitm/configure.tgt
index d6828e8..74cb0b5 100644
--- a/libitm/configure.tgt
+++ b/libitm/configure.tgt
@@ -61,6 +61,7 @@ case "${target_cpu}" in
 	      XCFLAGS="${XCFLAGS} -fomit-frame-pointer"
 	    fi
 	esac
+	XCFLAGS="${XCFLAGS} -mrtm"
 	ARCH=x86
 	;;
 
@@ -101,6 +102,7 @@ case "${target_cpu}" in
 	    XCFLAGS="${XCFLAGS} -fomit-frame-pointer"
 	    ;;
 	esac
+	XCFLAGS="${XCFLAGS} -mrtm"
 	ARCH=x86
 	;;
 
diff --git a/libitm/libitm_i.h b/libitm/libitm_i.h
index e826abd..4dfcda9 100644
--- a/libitm/libitm_i.h
+++ b/libitm/libitm_i.h
@@ -332,9 +332,14 @@ extern abi_dispatch *dispatch_serialirr();
 extern abi_dispatch *dispatch_serialirr_onwrite();
 extern abi_dispatch *dispatch_gl_wt();
 extern abi_dispatch *dispatch_ml_wt();
+extern abi_dispatch *dispatch_htm();
 
 extern gtm_cacheline_mask gtm_mask_stack(gtm_cacheline *, gtm_cacheline_mask);
 
+// Control variable for the HTM fastpath that uses serial mode as fallback.
+// Non-zero if the HTM fastpath is enabled. See gtm_thread::begin_transaction.
+extern uint32_t htm_fastpath;
+
 } // namespace GTM
 
 #endif // LIBITM_I_H
diff --git a/libitm/method-serial.cc b/libitm/method-serial.cc
index 09cfdd4..38857dc 100644
--- a/libitm/method-serial.cc
+++ b/libitm/method-serial.cc
@@ -212,11 +212,46 @@ class serialirr_onwrite_dispatch : public serialirr_dispatch
   }
 };
 
+// This group is pure HTM with serial mode as a fallback.  There is no
+// difference to serial_mg except that we need to enable or disable the HTM
+// fastpath.  See gtm_thread::begin_transaction.
+struct htm_mg : public method_group
+{
+  virtual void init()
+  {
+    // Enable the HTM fastpath if the HW is available.  The fastpath is
+    // initially disabled.
+#ifdef USE_HTM_FASTPATH
+    htm_fastpath = htm_init();
+#endif
+  }
+  virtual void fini()
+  {
+    // Disable the HTM fastpath.
+    htm_fastpath = 0;
+  }
+};
+
+static htm_mg o_htm_mg;
+
+// We just need the subclass to associate it with the HTM method group that
+// sets up the HTM fast path.  This will use serial_dispatch as fallback for
+// transactions that might get canceled; it has a different method group, but
+// this is harmless for serial dispatchs because they never abort.
+class htm_dispatch : public serialirr_dispatch
+{
+ public:
+  htm_dispatch() : serialirr_dispatch(false, true, false, false,
+      gtm_thread::STATE_SERIAL | gtm_thread::STATE_IRREVOCABLE, &o_htm_mg)
+  { }
+};
+
 } // anon namespace
 
 static const serialirr_dispatch o_serialirr_dispatch;
 static const serial_dispatch o_serial_dispatch;
 static const serialirr_onwrite_dispatch o_serialirr_onwrite_dispatch;
+static const htm_dispatch o_htm_dispatch;
 
 abi_dispatch *
 GTM::dispatch_serialirr ()
@@ -237,6 +272,12 @@ GTM::dispatch_serialirr_onwrite ()
       const_cast<serialirr_onwrite_dispatch *>(&o_serialirr_onwrite_dispatch);
 }
 
+abi_dispatch *
+GTM::dispatch_htm ()
+{
+  return const_cast<htm_dispatch *>(&o_htm_dispatch);
+}
+
 // Put the transaction into serial-irrevocable mode.
 
 void
@@ -244,6 +285,13 @@ GTM::gtm_thread::serialirr_mode ()
 {
   struct abi_dispatch *disp = abi_disp ();
 
+#if defined(USE_HTM_FASTPATH)
+  // HTM fastpath.  If we are executing a HW transaction, don't go serial but
+  // continue.  See gtm_thread::begin_transaction.
+  if (likely(htm_fastpath && !gtm_thread::serial_lock.is_write_locked()))
+    return;
+#endif
+
   if (this->state & STATE_SERIAL)
     {
       if (this->state & STATE_IRREVOCABLE)
diff --git a/libitm/retry.cc b/libitm/retry.cc
index 172419b..bb7a1f5 100644
--- a/libitm/retry.cc
+++ b/libitm/retry.cc
@@ -1,4 +1,4 @@
-/* Copyright (C) 2008, 2009, 2011 Free Software Foundation, Inc.
+/* Copyright (C) 2008, 2009, 2011, 2012 Free Software Foundation, Inc.
    Contributed by Richard Henderson <rth@redhat.com>.
 
    This file is part of the GNU Transactional Memory Library (libitm).
@@ -254,6 +254,11 @@ parse_default_method()
       disp = GTM::dispatch_ml_wt();
       env += 5;
     }
+  else if (strncmp(env, "htm", 3) == 0)
+    {
+      disp = GTM::dispatch_htm();
+      env += 3;
+    }
   else
     goto unknown;
 
@@ -311,7 +316,15 @@ GTM::gtm_thread::number_of_threads_changed(unsigned previous, unsigned now)
 	set_default_dispatch(default_dispatch_user);
       else
 	{
-	  abi_dispatch* a = dispatch_ml_wt();
+	  // If HTM is available, use it by default with serial mode as
+	  // fallback.  Otherwise, use ml_wt because it probably scales best.
+	  abi_dispatch* a;
+#ifdef USE_HTM_FASTPATH
+	  if (htm_available())
+	    a = dispatch_htm();
+	  else
+#endif
+	    a = dispatch_ml_wt();
 	  if (a->supports(now))
 	    set_default_dispatch(a);
 	  else
