Patchwork 6/n: trans-mem: runtime

login
register
mail settings
Submitter Aldy Hernandez
Date Nov. 3, 2011, 5:54 p.m.
Message ID <4EB2D539.10505@redhat.com>
Download mbox | patch
Permalink /patch/123478/
State New
Headers show

Comments

Aldy Hernandez - Nov. 3, 2011, 5:54 p.m.
Index: libitm/config/x86/unaligned.h
===================================================================
--- libitm/config/x86/unaligned.h	(.../trunk)	(revision 0)
+++ libitm/config/x86/unaligned.h	(.../branches/transactional-memory) 
(revision 180773)
@@ -0,0 +1,237 @@
+/* Copyright (C) 2009 Free Software Foundation, Inc.
+   Contributed by Richard Henderson <rth@redhat.com>.
+
+   This file is part of the GNU Transactional Memory Library (libitm).
+
+   Libitm is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 3 of the License, or
+   (at your option) any later version.
+
+   Libitm is distributed in the hope that it will be useful, but 
WITHOUT ANY
+   WARRANTY; without even the implied warranty of MERCHANTABILITY or 
FITNESS
+   FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+   more details.
+
+   Under Section 7 of GPL version 3, you are granted additional
+   permissions described in the GCC Runtime Library Exception, version
+   3.1, as published by the Free Software Foundation.
+
+   You should have received a copy of the GNU General Public License and
+   a copy of the GCC Runtime Library Exception along with this program;
+   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#ifndef LIBITM_X86_UNALIGNED_H
+#define LIBITM_X86_UNALIGNED_H 1
+
+#define HAVE_ARCH_UNALIGNED_LOAD2_U4 1
+#define HAVE_ARCH_UNALIGNED_LOAD2_U8 1
+
+#include "config/generic/unaligned.h"
+
+namespace GTM HIDDEN {
+
+template<>
+inline uint32_t
+unaligned_load2<uint32_t>(const gtm_cacheline *c1,
+			  const gtm_cacheline *c2, size_t ofs)
+{
+  uint32_t r, lo, hi;
+  lo = c1->u32[CACHELINE_SIZE / sizeof(uint32_t) - 1];
+  hi = c2->u32[0];
+  asm("shrd %b2, %1, %0" : "=r"(r) : "r"(hi), "c"((ofs & 3) * 8), "0"(lo));
+  return r;
+}
+
+template<>
+inline uint64_t
+unaligned_load2<uint64_t>(const gtm_cacheline *c1,
+			  const gtm_cacheline *c2, size_t ofs)
+{
+#ifdef __x86_64__
+  uint64_t r, lo, hi;
+  lo = c1->u64[CACHELINE_SIZE / sizeof(uint64_t) - 1];
+  hi = c2->u64[0];
+  asm("shrd %b2, %1, %0" : "=r"(r) : "r"(hi), "c"((ofs & 3) * 8), "0"(lo));
+  return r;
+#else
+  uint32_t v0, v1, v2;
+  uint64_t r;
+
+  if (ofs < CACHELINE_SIZE - 4)
+    {
+      v0 = c1->u32[CACHELINE_SIZE / sizeof(uint32_t) - 2];
+      v1 = c1->u32[CACHELINE_SIZE / sizeof(uint32_t) - 1];
+      v2 = c2->u32[0];
+    }
+  else
+    {
+      v0 = c1->u32[CACHELINE_SIZE / sizeof(uint32_t) - 1];
+      v1 = c2->u32[0];
+      v2 = c2->u32[1];
+    }
+  ofs = (ofs & 3) * 8;
+  asm("shrd %%cl, %[v1], %[v0]; shrd %%cl, %[v2], %[v1]"
+      : "=A"(r) : "c"(ofs), [v0] "a"(v0), [v1] "d"(v1), [v2] "r"(v2));
+
+  return r;
+#endif
+}
+
+#if defined(__SSE2__) || defined(__MMX__)
+template<>
+inline _ITM_TYPE_M64
+unaligned_load2<_ITM_TYPE_M64>(const gtm_cacheline *c1,
+			       const gtm_cacheline *c2, size_t ofs)
+{
+# ifdef __x86_64__
+  __m128i lo = _mm_movpi64_epi64 (c1->m64[CACHELINE_SIZE / 8 - 1]);
+  __m128i hi = _mm_movpi64_epi64 (c2->m64[0]);
+
+  ofs = (ofs & 7) * 8;
+  lo = _mm_srli_epi64 (lo, ofs);
+  hi = _mm_slli_epi64 (hi, 64 - ofs);
+  lo = lo | hi;
+  return _mm_movepi64_pi64 (lo);
+# else
+  // On 32-bit we're about to return the result in an MMX register, so go
+  // ahead and do the computation in that unit, even if SSE2 is available.
+  __m64 lo = c1->m64[CACHELINE_SIZE / 8 - 1];
+  __m64 hi = c2->m64[0];
+
+  ofs = (ofs & 7) * 8;
+  lo = _mm_srli_si64 (lo, ofs);
+  hi = _mm_slli_si64 (hi, 64 - ofs);
+  return lo | hi;
+# endif
+}
+#endif // SSE2 or MMX
+
+// The SSE types are strictly aligned.
+#ifdef __SSE__
+template<>
+  struct strict_alignment<_ITM_TYPE_M128>
+    : public std::true_type
+  { };
+
+// Expand the unaligned SSE move instructions.
+template<>
+inline _ITM_TYPE_M128
+unaligned_load<_ITM_TYPE_M128>(const void *t)
+{
+  return _mm_loadu_ps (static_cast<const float *>(t));
+}
+
+template<>
+inline void
+unaligned_store<_ITM_TYPE_M128>(void *t, _ITM_TYPE_M128 val)
+{
+  _mm_storeu_ps (static_cast<float *>(t), val);
+}
+#endif // SSE
+
+#ifdef __AVX__
+// The AVX types are strictly aligned when it comes to vmovaps vs vmovups.
+template<>
+  struct strict_alignment<_ITM_TYPE_M256>
+    : public std::true_type
+  { };
+
+template<>
+inline _ITM_TYPE_M256
+unaligned_load<_ITM_TYPE_M256>(const void *t)
+{
+  return _mm256_loadu_ps (static_cast<const float *>(t));
+}
+
+template<>
+inline void
+unaligned_store<_ITM_TYPE_M256>(void *t, _ITM_TYPE_M256 val)
+{
+  _mm256_storeu_ps (static_cast<float *>(t), val);
+}
+#endif // AVX
+
+#ifdef __XOP__
+# define HAVE_ARCH_REALIGN_M128I 1
+extern const __v16qi GTM_vpperm_shift[16];
+inline __m128i
+realign_m128i (__m128i lo, __m128i hi, unsigned byte_count)
+{
+  return _mm_perm_epi8 (lo, hi, GTM_vpperm_shift[byte_count]);
+}
+#elif defined(__AVX__)
+# define HAVE_ARCH_REALIGN_M128I 1
+extern "C" const uint64_t GTM_vpalignr_table[16];
+inline __m128i
+realign_m128i (__m128i lo, __m128i hi, unsigned byte_count)
+{
+  register __m128i xmm0 __asm__("xmm0") = hi;
+  register __m128i xmm1 __asm__("xmm1") = lo;
+  __asm("call *%2" : "+x"(xmm0) : "x"(xmm1),
+	"r"(&GTM_vpalignr_table[byte_count]));
+  return xmm0;
+}
+#elif defined(__SSSE3__)
+# define HAVE_ARCH_REALIGN_M128I 1
+extern "C" const uint64_t GTM_palignr_table[16];
+inline __m128i
+realign_m128i (__m128i lo, __m128i hi, unsigned byte_count)
+{
+  register __m128i xmm0 __asm__("xmm0") = hi;
+  register __m128i xmm1 __asm__("xmm1") = lo;
+  __asm("call *%2" : "+x"(xmm0) : "x"(xmm1),
+	"r"(&GTM_palignr_table[byte_count]));
+  return xmm0;
+}
+#elif defined(__SSE2__)
+# define HAVE_ARCH_REALIGN_M128I 1
+extern "C" const char GTM_pshift_table[16 * 16];
+inline __m128i
+realign_m128i (__m128i lo, __m128i hi, unsigned byte_count)
+{
+  register __m128i xmm0 __asm__("xmm0") = lo;
+  register __m128i xmm1 __asm__("xmm1") = hi;
+  __asm("call *%2" : "+x"(xmm0), "+x"(xmm1)
+	: "r"(GTM_pshift_table + byte_count*16));
+  return xmm0;
+}
+#endif // XOP, AVX, SSSE3, SSE2
+
+#ifdef HAVE_ARCH_REALIGN_M128I
+template<>
+inline _ITM_TYPE_M128
+unaligned_load2<_ITM_TYPE_M128>(const gtm_cacheline *c1,
+				const gtm_cacheline *c2, size_t ofs)
+{
+  return (_ITM_TYPE_M128)
+    realign_m128i (c1->m128i[CACHELINE_SIZE / 16 - 1],
+		   c2->m128i[0], ofs & 15);
+}
+#endif // HAVE_ARCH_REALIGN_M128I
+
+#ifdef __AVX__
+template<>
+inline _ITM_TYPE_M256
+unaligned_load2<_ITM_TYPE_M256>(const gtm_cacheline *c1,
+				const gtm_cacheline *c2, size_t ofs)
+{
+  __m128i v0, v1;
+  __m256i r;
+
+  v0 = (__m128i) unaligned_load2<_ITM_TYPE_M128>(c1, c2, ofs);
+  if (ofs < CACHELINE_SIZE - 16)
+    v1 = v0, v0 = _mm_loadu_si128 ((const __m128i *) &c1->b[ofs]);
+  else
+    v1 = _mm_loadu_si128((const __m128i *)&c2->b[ofs + 16 - 
CACHELINE_SIZE]);
+
+  r = _mm256_castsi128_si256 ((__m128i)v0);
+  r = _mm256_insertf128_si256 (r, (__m128i)v1, 1);
+  return (_ITM_TYPE_M256) r;
+}
+#endif // AVX
+
+} // namespace GTM
+
+#endif // LIBITM_X86_UNALIGNED_H
Index: libitm/config/generic/cacheline.cc
===================================================================
--- libitm/config/generic/cacheline.cc	(.../trunk)	(revision 0)
+++ libitm/config/generic/cacheline.cc 
(.../branches/transactional-memory)	(revision 180773)
@@ -0,0 +1,49 @@
+/* Copyright (C) 2009 Free Software Foundation, Inc.
+   Contributed by Richard Henderson <rth@redhat.com>.
+
+   This file is part of the GNU Transactional Memory Library (libitm).
+
+   Libitm is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 3 of the License, or
+   (at your option) any later version.
+
+   Libitm is distributed in the hope that it will be useful, but 
WITHOUT ANY
+   WARRANTY; without even the implied warranty of MERCHANTABILITY or 
FITNESS
+   FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+   more details.
+
+   Under Section 7 of GPL version 3, you are granted additional
+   permissions described in the GCC Runtime Library Exception, version
+   3.1, as published by the Free Software Foundation.
+
+   You should have received a copy of the GNU General Public License and
+   a copy of the GCC Runtime Library Exception along with this program;
+   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#include "libitm_i.h"
+
+
+namespace GTM HIDDEN {
+
+void
+gtm_cacheline::copy_mask (gtm_cacheline * __restrict d,
+			  const gtm_cacheline * __restrict s,
+			  gtm_cacheline_mask m)
+{
+  const size_t n = sizeof (gtm_word);
+
+  if (m == (gtm_cacheline_mask) -1)
+    {
+      *d = *s;
+      return;
+    }
+  if (__builtin_expect (m == 0, 0))
+    return;
+
+  for (size_t i = 0; i < CACHELINE_SIZE / n; ++i, m >>= n)
+    store_mask (&d->w[i], s->w[i], m);
+}
+
+} // namespace GTM
Index: libitm/config/generic/cacheline.h
===================================================================
--- libitm/config/generic/cacheline.h	(.../trunk)	(revision 0)
+++ libitm/config/generic/cacheline.h 
(.../branches/transactional-memory)	(revision 180773)
@@ -0,0 +1,107 @@
+/* Copyright (C) 2009 Free Software Foundation, Inc.
+   Contributed by Richard Henderson <rth@redhat.com>.
+
+   This file is part of the GNU Transactional Memory Library (libitm).
+
+   Libitm is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 3 of the License, or
+   (at your option) any later version.
+
+   Libitm is distributed in the hope that it will be useful, but 
WITHOUT ANY
+   WARRANTY; without even the implied warranty of MERCHANTABILITY or 
FITNESS
+   FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+   more details.
+
+   Under Section 7 of GPL version 3, you are granted additional
+   permissions described in the GCC Runtime Library Exception, version
+   3.1, as published by the Free Software Foundation.
+
+   You should have received a copy of the GNU General Public License and
+   a copy of the GCC Runtime Library Exception along with this program;
+   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#ifndef LIBITM_CACHELINE_H
+#define LIBITM_CACHELINE_H 1
+
+namespace GTM HIDDEN {
+
+// A cacheline is the smallest unit with which locks are associated.
+// The current implementation of the _ITM_[RW] barriers assumes that
+// all data types can fit (aligned) within a cachline, which means
+// in practice sizeof(complex long double) is the smallest cacheline size.
+// It ought to be small enough for efficient manipulation of the
+// modification mask, below.
+#ifndef CACHELINE_SIZE
+# define CACHELINE_SIZE 32
+#endif
+
+// A gtm_cacheline_mask stores a modified bit for every modified byte
+// in the cacheline with which it is associated.
+typedef sized_integral<CACHELINE_SIZE / 8>::type gtm_cacheline_mask;
+
+union gtm_cacheline
+{
+  // Byte access to the cacheline.
+  unsigned char b[CACHELINE_SIZE] __attribute__((aligned(CACHELINE_SIZE)));
+
+  // Larger sized access to the cacheline.
+  uint16_t u16[CACHELINE_SIZE / sizeof(uint16_t)];
+  uint32_t u32[CACHELINE_SIZE / sizeof(uint32_t)];
+  uint64_t u64[CACHELINE_SIZE / sizeof(uint64_t)];
+  gtm_word w[CACHELINE_SIZE / sizeof(gtm_word)];
+
+  // Store S into D, but only the bytes specified by M.
+  template<typename T> static void store_mask (T *d, T s, uint8_t m);
+
+  // Copy S to D, but only the bytes specified by M.
+  static void copy_mask (gtm_cacheline * __restrict d,
+			 const gtm_cacheline * __restrict s,
+			 gtm_cacheline_mask m);
+
+  // A write barrier to emit after (a series of) copy_mask.
+  // When we're emitting non-temporal stores, the normal strong
+  // ordering of the machine doesn't apply.
+  static void copy_mask_wb () { atomic_write_barrier(); }
+};
+
+template<typename T>
+inline void
+gtm_cacheline::store_mask (T *d, T s, uint8_t m)
+{
+  const uint8_t tm = (1 << sizeof(T)) - 1;
+
+  if (__builtin_expect (m & tm, tm))
+    {
+      if (__builtin_expect ((m & tm) == tm, 1))
+	*d = s;
+      else
+        {
+	  const int half = sizeof(T) / 2;
+	  typedef typename sized_integral<half>::type half_t;
+	  half_t *dhalf = reinterpret_cast<half_t *>(d);
+	  half_t s1, s2;
+
+	  if (WORDS_BIGENDIAN)
+	    s1 = s >> half*8, s2 = s;
+	  else
+	    s1 = s, s2 = s >> half*8;
+
+	  store_mask (dhalf, s1, m);
+	  store_mask (dhalf + 1, s2, m >> half);
+	}
+    }
+}
+
+template<>
+inline void ALWAYS_INLINE
+gtm_cacheline::store_mask<uint8_t> (uint8_t *d, uint8_t s, uint8_t m)
+{
+  if (m & 1)
+    *d = s;
+}
+
+} // namespace GTM
+
+#endif // LIBITM_CACHELINE_H
Index: libitm/config/generic/cachepage.h
===================================================================
--- libitm/config/generic/cachepage.h	(.../trunk)	(revision 0)
+++ libitm/config/generic/cachepage.h 
(.../branches/transactional-memory)	(revision 180773)
@@ -0,0 +1,77 @@
+/* Copyright (C) 2009 Free Software Foundation, Inc.
+   Contributed by Richard Henderson <rth@redhat.com>.
+
+   This file is part of the GNU Transactional Memory Library (libitm).
+
+   Libitm is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 3 of the License, or
+   (at your option) any later version.
+
+   Libitm is distributed in the hope that it will be useful, but 
WITHOUT ANY
+   WARRANTY; without even the implied warranty of MERCHANTABILITY or 
FITNESS
+   FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+   more details.
+
+   Under Section 7 of GPL version 3, you are granted additional
+   permissions described in the GCC Runtime Library Exception, version
+   3.1, as published by the Free Software Foundation.
+
+   You should have received a copy of the GNU General Public License and
+   a copy of the GCC Runtime Library Exception along with this program;
+   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#ifndef LIBITM_CACHEPAGE_H
+#define LIBITM_CACHEPAGE_H 1
+
+namespace GTM HIDDEN {
+
+// A "page" worth of saved cachelines plus modification masks.  This
+// arrangement is intended to minimize the overhead of alignment.  The
+// PAGE_SIZE defined by the target must be a constant for this to work,
+// which means that this definition may not be the same as the real
+// system page size.  An additional define of FIXED_PAGE_SIZE by the
+// target indicates that PAGE_SIZE exactly matches the system page size.
+
+#ifndef PAGE_SIZE
+#define PAGE_SIZE 4096
+#endif
+
+struct gtm_cacheline_page
+{
+  static const size_t LINES
+    = ((PAGE_SIZE - sizeof(gtm_cacheline_page *))
+       / (CACHELINE_SIZE + sizeof(gtm_cacheline_mask)));
+
+  gtm_cacheline lines[LINES] __attribute__((aligned(PAGE_SIZE)));
+  gtm_cacheline_mask masks[LINES];
+  gtm_cacheline_page *prev;
+
+  static gtm_cacheline_page *
+  page_for_line (gtm_cacheline *c)
+  {
+    return (gtm_cacheline_page *)((uintptr_t)c & -PAGE_SIZE);
+  }
+
+  gtm_cacheline_mask *
+  mask_for_line (gtm_cacheline *c)
+  {
+    size_t index = c - &this->lines[0];
+    return &this->masks[index];
+  }
+
+  static gtm_cacheline_mask *
+  mask_for_page_line (gtm_cacheline *c)
+  {
+    gtm_cacheline_page *p = page_for_line (c);
+    return p->mask_for_line (c);
+  }
+
+  static void *operator new (size_t);
+  static void operator delete (void *);
+};
+
+} // namespace GTM
+
+#endif // LIBITM_CACHEPAGE_H
Index: libitm/config/generic/tls.cc
===================================================================
--- libitm/config/generic/tls.cc	(.../trunk)	(revision 0)
+++ libitm/config/generic/tls.cc	(.../branches/transactional-memory) 
(revision 180773)
@@ -0,0 +1,76 @@
+/* Copyright (C) 2010 Free Software Foundation, Inc.
+   Contributed by Richard Henderson <rth@redhat.com>.
+
+   This file is part of the GNU Transactional Memory Library (libitm).
+
+   Libitm is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 3 of the License, or
+   (at your option) any later version.
+
+   Libitm is distributed in the hope that it will be useful, but 
WITHOUT ANY
+   WARRANTY; without even the implied warranty of MERCHANTABILITY or 
FITNESS
+   FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+   more details.
+
+   Under Section 7 of GPL version 3, you are granted additional
+   permissions described in the GCC Runtime Library Exception, version
+   3.1, as published by the Free Software Foundation.
+
+   You should have received a copy of the GNU General Public License and
+   a copy of the GCC Runtime Library Exception along with this program;
+   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#include "libitm_i.h"
+
+namespace GTM HIDDEN {
+
+// Filter out any updates that overlap the libitm stack, as defined by
+// TOP (entry point to library) and BOT (below current function).  This
+// definition should be fine for all stack-grows-down architectures.
+
+gtm_cacheline_mask __attribute__((noinline))
+gtm_mask_stack(gtm_cacheline *line, gtm_cacheline_mask mask)
+{
+  void *top = gtm_thr()->jb.cfa;
+  void *bot = __builtin_dwarf_cfa();
+
+  // We must have come through an entry point that set TOP.
+  assert (top != NULL);
+
+  if (line + 1 < bot)
+    {
+      // Since we don't have the REAL stack boundaries for this thread,
+      // we cannot know if this is a dead write to a stack address below
+      // the current function or if it is write to another VMA.  In either
+      // case allowing the write should not affect correctness.
+    }
+  else if (line >= top)
+    {
+      // A valid write to an address in an outer stack frame, or a write
+      // to another VMA.
+    }
+  else
+    {
+      uintptr_t diff = (uintptr_t)top - (uintptr_t)line;
+      if (diff >= CACHELINE_SIZE)
+	{
+	  // The write is either fully within the proscribed area, or the tail
+	  // of the cacheline overlaps the proscribed area.  Assume that all
+	  // stacks are at least cacheline aligned and declare the head of the
+	  // cacheline dead.
+	  mask = 0;
+	}
+      else
+	{
+	  // The head of the cacheline is within the proscribed area, but the
+	  // tail of the cacheline is live.  Eliminate the dead writes.
+	  mask &= (gtm_cacheline_mask)-1 << diff;
+	}
+    }
+
+  return mask;
+}
+
+} // namespace GTM
Index: libitm/config/generic/tls.h
===================================================================
--- libitm/config/generic/tls.h	(.../trunk)	(revision 0)
+++ libitm/config/generic/tls.h	(.../branches/transactional-memory) 
(revision 180773)
@@ -0,0 +1,65 @@
+/* Copyright (C) 2008, 2009, 2011 Free Software Foundation, Inc.
+   Contributed by Richard Henderson <rth@redhat.com>.
+
+   This file is part of the GNU Transactional Memory Library (libitm).
+
+   Libitm is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 3 of the License, or
+   (at your option) any later version.
+
+   Libitm is distributed in the hope that it will be useful, but 
WITHOUT ANY
+   WARRANTY; without even the implied warranty of MERCHANTABILITY or 
FITNESS
+   FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+   more details.
+
+   Under Section 7 of GPL version 3, you are granted additional
+   permissions described in the GCC Runtime Library Exception, version
+   3.1, as published by the Free Software Foundation.
+
+   You should have received a copy of the GNU General Public License and
+   a copy of the GCC Runtime Library Exception along with this program;
+   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#ifndef LIBITM_TLS_H
+#define LIBITM_TLS_H 1
+
+namespace GTM HIDDEN {
+
+#if !defined(HAVE_ARCH_GTM_THREAD) || !defined(HAVE_ARCH_GTM_THREAD_DISP)
+// Provides a single place to store all this libraries thread-local data.
+struct gtm_thread_tls
+{
+#ifndef HAVE_ARCH_GTM_THREAD
+  // The currently active transaction.  Elided if the target provides
+  // some efficient mechanism for storing this.
+  gtm_thread *thr;
+#endif
+#ifndef HAVE_ARCH_GTM_THREAD_DISP
+  // The dispatch table for the STM implementation currently in use. 
Elided
+  // if the target provides some efficient mechanism for storing this.
+  abi_dispatch *disp;
+#endif
+};
+
+extern __thread gtm_thread_tls _gtm_thr_tls;
+#endif
+
+#ifndef HAVE_ARCH_GTM_THREAD
+// If the target does not provide optimized access to the thread-local
+// data, simply access the TLS variable defined above.
+static inline gtm_thread *gtm_thr() { return &_gtm_thr_tls.thr; }
+static inline void set_gtm_thr(gtm_thread *x) { _gtm_thr_tls.thr = x; }
+#endif
+
+#ifndef HAVE_ARCH_GTM_THREAD_DISP
+// If the target does not provide optimized access to the currently
+// active dispatch table, simply access via GTM_THR.
+static inline abi_dispatch * abi_disp() { return _gtm_thr_tls.disp; }
+static inline void set_abi_disp(abi_dispatch *x) { _gtm_thr_tls.disp = x; }
+#endif
+
+} // namespace GTM
+
+#endif // LIBITM_TLS_H
Index: libitm/config/generic/unaligned.h
===================================================================
--- libitm/config/generic/unaligned.h	(.../trunk)	(revision 0)
+++ libitm/config/generic/unaligned.h 
(.../branches/transactional-memory)	(revision 180773)
@@ -0,0 +1,228 @@
+/* Copyright (C) 2009 Free Software Foundation, Inc.
+   Contributed by Richard Henderson <rth@redhat.com>.
+
+   This file is part of the GNU Transactional Memory Library (libitm).
+
+   Libitm is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 3 of the License, or
+   (at your option) any later version.
+
+   Libitm is distributed in the hope that it will be useful, but 
WITHOUT ANY
+   WARRANTY; without even the implied warranty of MERCHANTABILITY or 
FITNESS
+   FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+   more details.
+
+   Under Section 7 of GPL version 3, you are granted additional
+   permissions described in the GCC Runtime Library Exception, version
+   3.1, as published by the Free Software Foundation.
+
+   You should have received a copy of the GNU General Public License and
+   a copy of the GCC Runtime Library Exception along with this program;
+   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#ifndef LIBITM_UNALIGNED_H
+#define LIBITM_UNALIGNED_H 1
+
+namespace GTM HIDDEN {
+
+#ifndef STRICT_ALIGNMENT
+#define STRICT_ALIGNMENT 1
+#endif
+
+// A type trait for whether type T requires strict alignment.
+// The generic types are assumed to all be the same; specializations
+// for target-specific types should be done in config/cpu/unaligned.h.
+template<typename T>
+  struct strict_alignment
+    : public std::integral_constant<bool, STRICT_ALIGNMENT>
+  { };
+
+// A helper template for accessing an integral type the same size as T
+template<typename T>
+  struct make_integral
+    : public sized_integral<sizeof(T)>
+  { };
+
+// A helper class for accessing T as an unaligned value.
+template<typename T>
+struct __attribute__((packed)) unaligned_helper
+  { T x; };
+
+// A helper class for view-converting T as an integer.
+template<typename T>
+union view_convert_helper
+{
+  typedef T type;
+  typedef make_integral<T> itype;
+
+  type t;
+  itype i;
+};
+
+// Generate an unaligned load sequence.
+// The compiler knows how to do this for any specific type.
+template<typename T>
+inline T ALWAYS_INLINE
+unaligned_load(const void *t)
+{
+  typedef unaligned_helper<T> UT;
+  const UT *ut = reinterpret_cast<const UT *>(t);
+  return ut->x;
+}
+
+// Generate an unaligned store sequence.
+template<typename T>
+inline void ALWAYS_INLINE
+unaligned_store(void *t, T val)
+{
+  typedef unaligned_helper<T> UT;
+  UT *ut = reinterpret_cast<UT *>(t);
+  ut->x = val;
+}
+
+// Generate an unaligned load from two different cachelines.
+// It is known that OFS + SIZEOF(T) > CACHELINE_SIZE.
+template<typename T>
+inline T ALWAYS_INLINE
+unaligned_load2(const gtm_cacheline *c1, const gtm_cacheline *c2, 
size_t ofs)
+{
+  size_t left = CACHELINE_SIZE - ofs;
+  T ret;
+
+  memcpy (&ret, &c1->b[ofs], left);
+  memcpy ((char *)&ret + ofs, c2, sizeof(T) - left);
+
+  return ret;
+}
+
+// Generate an unaligned store into two different cachelines.
+// It is known that OFS + SIZEOF(T) > CACHELINE_SIZE.
+template<typename T>
+inline void ALWAYS_INLINE
+unaligned_store2(gtm_cacheline *c1, gtm_cacheline *c2, size_t ofs, T val)
+{
+  size_t left = CACHELINE_SIZE - ofs;
+  memcpy (&c1->b[ofs], &val, left);
+  memcpy (c2, (char *)&val + left, sizeof(T) - left);
+}
+
+#ifndef HAVE_ARCH_UNALIGNED_LOAD2_U2
+template<>
+inline uint16_t ALWAYS_INLINE
+unaligned_load2<uint16_t>(const gtm_cacheline *c1,
+			  const gtm_cacheline *c2, size_t ofs)
+{
+  uint16_t v1 = c1->b[CACHELINE_SIZE - 1];
+  uint16_t v2 = c2->b[0];
+
+  if (WORDS_BIGENDIAN)
+    return v1 << 8 | v2;
+  else
+    return v2 << 8 | v1;
+}
+#endif
+
+#ifndef HAVE_ARCH_UNALIGNED_LOAD2_U4
+template<>
+inline uint32_t ALWAYS_INLINE
+unaligned_load2<uint32_t>(const gtm_cacheline *c1,
+			  const gtm_cacheline *c2, size_t ofs)
+{
+  uint32_t v1 = c1->u32[CACHELINE_SIZE / sizeof(uint32_t) - 1];
+  uint32_t v2 = c2->u32[0];
+  int s2 = (ofs & (sizeof(uint32_t) - 1)) * 8;
+  int s1 = sizeof(uint32_t) * 8 - s2;
+
+  if (WORDS_BIGENDIAN)
+    return v1 << s2 | v2 >> s1;
+  else
+    return v2 << s2 | v1 >> s1;
+}
+#endif
+
+#ifndef HAVE_ARCH_UNALIGNED_LOAD2_U8
+template<>
+inline uint64_t ALWAYS_INLINE
+unaligned_load2<uint64_t>(const gtm_cacheline *c1,
+			  const gtm_cacheline *c2, size_t ofs)
+{
+  uint64_t v1 = c1->u64[CACHELINE_SIZE / sizeof(uint64_t) - 1];
+  uint64_t v2 = c2->u64[0];
+  int s2 = (ofs & (sizeof(uint64_t) - 1)) * 8;
+  int s1 = sizeof(uint64_t) * 8 - s2;
+
+  if (WORDS_BIGENDIAN)
+    return v1 << s2 | v2 >> s1;
+  else
+    return v2 << s2 | v1 >> s1;
+}
+#endif
+
+template<>
+inline float ALWAYS_INLINE
+unaligned_load2<float>(const gtm_cacheline *c1,
+		       const gtm_cacheline *c2, size_t ofs)
+{
+  typedef view_convert_helper<float> VC; VC vc;
+  vc.i = unaligned_load2<VC::itype>(c1, c2, ofs);
+  return vc.t;
+}
+
+template<>
+inline double ALWAYS_INLINE
+unaligned_load2<double>(const gtm_cacheline *c1,
+			const gtm_cacheline *c2, size_t ofs)
+{
+  typedef view_convert_helper<double> VC; VC vc;
+  vc.i = unaligned_load2<VC::itype>(c1, c2, ofs);
+  return vc.t;
+}
+
+#ifndef HAVE_ARCH_UNALIGNED_STORE2_U2
+template<>
+inline void ALWAYS_INLINE
+unaligned_store2<uint16_t>(gtm_cacheline *c1, gtm_cacheline *c2,
+			   size_t ofs, uint16_t val)
+{
+  uint8_t vl = val, vh = val >> 8;
+
+  if (WORDS_BIGENDIAN)
+    {
+      c1->b[CACHELINE_SIZE - 1] = vh;
+      c2->b[0] = vl;
+    }
+  else
+    {
+      c1->b[CACHELINE_SIZE - 1] = vl;
+      c2->b[0] = vh;
+    }
+}
+#endif
+
+#if 0
+#ifndef HAVE_ARCH_UNALIGNED_STORE2_U4
+template<>
+inline void ALWAYS_INLINE
+unaligned_store2<uint32_t>(gtm_cacheline *c1, gtm_cacheline *c2,
+			   size_t ofs, uint32_t val)
+{
+  // ??? We could reuse the store_mask stuff here.
+}
+#endif
+
+template<>
+inline void ALWAYS_INLINE
+unaligned_store2<float>(gtm_cacheline *c1, gtm_cacheline *c2,
+			size_t ofs, float val)
+{
+  typedef view_convert_helper<float> VC; VC vc;
+  vc.t = val;
+  unaligned_store2(c1, c2, ofs, vc.i);
+}
+#endif
+
+} // namespace GTM
+
+#endif // LIBITM_UNALIGNED_H
Index: libitm/acinclude.m4
===================================================================
--- libitm/acinclude.m4	(.../trunk)	(revision 0)
+++ libitm/acinclude.m4	(.../branches/transactional-memory)	(revision 
180773)
@@ -0,0 +1,343 @@
+dnl ----------------------------------------------------------------------
+dnl This whole bit snagged from libgfortran.
+
+dnl Check whether the target supports __sync_*_compare_and_swap.
+AC_DEFUN([LIBITM_CHECK_SYNC_BUILTINS], [
+  AC_CACHE_CHECK([whether the target supports __sync_*_compare_and_swap],
+		 libitm_cv_have_sync_builtins, [
+  AC_TRY_LINK([], [int foo, bar; bar = 
__sync_val_compare_and_swap(&foo, 0, 1);],
+	      libitm_cv_have_sync_builtins=yes, libitm_cv_have_sync_builtins=no)])
+  if test $libitm_cv_have_sync_builtins = yes; then
+    AC_DEFINE(HAVE_SYNC_BUILTINS, 1,
+	      [Define to 1 if the target supports __sync_*_compare_and_swap])
+  fi])
+
+dnl Check whether the target supports 64-bit __sync_*_compare_and_swap.
+AC_DEFUN([LIBITM_CHECK_64BIT_SYNC_BUILTINS], [
+  AC_CACHE_CHECK([whether the target supports 64-bit 
__sync_*_compare_and_swap],
+		 libitm_cv_have_64bit_sync_builtins, [
+  AC_TRY_LINK([#include <stdint.h>],
+    [uint64_t foo, bar;
+     bar = __sync_val_compare_and_swap(&foo, 0, 1);],
+    libitm_cv_have_64bit_sync_builtins=yes,
+    libitm_cv_have_64bit_sync_builtins=no)])
+    if test $libitm_cv_have_64bit_sync_builtins = yes; then
+      AC_DEFINE(HAVE_64BIT_SYNC_BUILTINS, 1,
+	        [Define to 1 if the target supports 64-bit 
__sync_*_compare_and_swap])
+  fi])
+
+dnl Check whether the target supports hidden visibility.
+AC_DEFUN([LIBITM_CHECK_ATTRIBUTE_VISIBILITY], [
+  AC_CACHE_CHECK([whether the target supports hidden visibility],
+		 libitm_cv_have_attribute_visibility, [
+  save_CFLAGS="$CFLAGS"
+  CFLAGS="$CFLAGS -Werror"
+  AC_TRY_COMPILE([void __attribute__((visibility("hidden"))) foo(void) 
{ }],
+		 [], libitm_cv_have_attribute_visibility=yes,
+		 libitm_cv_have_attribute_visibility=no)
+  CFLAGS="$save_CFLAGS"])
+  if test $libitm_cv_have_attribute_visibility = yes; then
+    AC_DEFINE(HAVE_ATTRIBUTE_VISIBILITY, 1,
+      [Define to 1 if the target supports 
__attribute__((visibility(...))).])
+  fi])
+
+dnl Check whether the target supports dllexport
+AC_DEFUN([LIBITM_CHECK_ATTRIBUTE_DLLEXPORT], [
+  AC_CACHE_CHECK([whether the target supports dllexport],
+		 libitm_cv_have_attribute_dllexport, [
+  save_CFLAGS="$CFLAGS"
+  CFLAGS="$CFLAGS -Werror"
+  AC_TRY_COMPILE([void __attribute__((dllexport)) foo(void) { }],
+		 [], libitm_cv_have_attribute_dllexport=yes,
+		 libitm_cv_have_attribute_dllexport=no)
+  CFLAGS="$save_CFLAGS"])
+  if test $libitm_cv_have_attribute_dllexport = yes; then
+    AC_DEFINE(HAVE_ATTRIBUTE_DLLEXPORT, 1,
+      [Define to 1 if the target supports __attribute__((dllexport)).])
+  fi])
+
+dnl Check whether the target supports symbol aliases.
+AC_DEFUN([LIBITM_CHECK_ATTRIBUTE_ALIAS], [
+  AC_CACHE_CHECK([whether the target supports symbol aliases],
+		 libitm_cv_have_attribute_alias, [
+  AC_TRY_LINK([
+void foo(void) { }
+extern void bar(void) __attribute__((alias("foo")));],
+    [bar();], libitm_cv_have_attribute_alias=yes, 
libitm_cv_have_attribute_alias=no)])
+  if test $libitm_cv_have_attribute_alias = yes; then
+    AC_DEFINE(HAVE_ATTRIBUTE_ALIAS, 1,
+      [Define to 1 if the target supports __attribute__((alias(...))).])
+  fi])
+
+dnl Check how size_t is mangled.
+AC_DEFUN([LIBITM_CHECK_SIZE_T_MANGLING], [
+  AC_CACHE_CHECK([how size_t is mangled],
+                 libitm_cv_size_t_mangling, [
+    AC_TRY_COMPILE([], [extern __SIZE_TYPE__ x; extern unsigned long x;],
+	           [libitm_cv_size_t_mangling=m], [
+      AC_TRY_COMPILE([], [extern __SIZE_TYPE__ x; extern unsigned int x;],
+	             [libitm_cv_size_t_mangling=j], [
+        AC_TRY_COMPILE([],
+		       [extern __SIZE_TYPE__ x; extern unsigned long long x;],
+	               [libitm_cv_size_t_mangling=y], [
+          AC_TRY_COMPILE([],
+			 [extern __SIZE_TYPE__ x; extern unsigned short x;],
+			 [libitm_cv_size_t_mangling=t],
+		         [libitm_cv_size_t_mangling=x])
+	])
+      ])
+    ])
+  ])
+  if test $libitm_cv_size_t_mangling = x; then
+    AC_MSG_ERROR([Unknown underlying type for size_t])
+  fi
+  AC_DEFINE_UNQUOTED(MANGLE_SIZE_T, [$libitm_cv_size_t_mangling],
+    [Define to the letter to which size_t is mangled.])
+])
+
+sinclude(../libtool.m4)
+dnl The lines below arrange for aclocal not to bring an installed
+dnl libtool.m4 into aclocal.m4, while still arranging for automake to
+dnl add a definition of LIBTOOL to Makefile.in.
+ifelse(,,,[AC_SUBST(LIBTOOL)
+AC_DEFUN([AM_PROG_LIBTOOL])
+AC_DEFUN([AC_LIBTOOL_DLOPEN])
+AC_DEFUN([AC_PROG_LD])
+])
+
+dnl ----------------------------------------------------------------------
+dnl This whole bit snagged from libstdc++-v3.
+
+dnl
+dnl LIBITM_ENABLE
+dnl    (FEATURE, DEFAULT, HELP-ARG, HELP-STRING)
+dnl    (FEATURE, DEFAULT, HELP-ARG, HELP-STRING, permit a|b|c)
+dnl    (FEATURE, DEFAULT, HELP-ARG, HELP-STRING, SHELL-CODE-HANDLER)
+dnl
+dnl See docs/html/17_intro/configury.html#enable for documentation.
+dnl
+m4_define([LIBITM_ENABLE],[dnl
+m4_define([_g_switch],[--enable-$1])dnl
+m4_define([_g_help],[AC_HELP_STRING(_g_switch$3,[$4 
@<:@default=$2@:>@])])dnl
+ AC_ARG_ENABLE($1,_g_help,
+  m4_bmatch([$5],
+   [^permit ],
+     [[
+      case "$enableval" in
+       m4_bpatsubst([$5],[permit ])) ;;
+       *) AC_MSG_ERROR(Unknown argument to enable/disable $1) ;;
+          dnl Idea for future:  generate a URL pointing to
+          dnl "onlinedocs/configopts.html#whatever"
+      esac
+     ]],
+   [^$],
+     [[
+      case "$enableval" in
+       yes|no) ;;
+       *) AC_MSG_ERROR(Argument to enable/disable $1 must be yes or no) ;;
+      esac
+     ]],
+   [[$5]]),
+  [enable_]m4_bpatsubst([$1],-,_)[=][$2])
+m4_undefine([_g_switch])dnl
+m4_undefine([_g_help])dnl
+])
+
+
+dnl
+dnl If GNU ld is in use, check to see if tricky linker opts can be 
used.  If
+dnl the native linker is in use, all variables will be defined to something
+dnl safe (like an empty string).
+dnl
+dnl Defines:
+dnl  SECTION_LDFLAGS='-Wl,--gc-sections' if possible
+dnl  OPT_LDFLAGS='-Wl,-O1' if possible
+dnl  LD (as a side effect of testing)
+dnl Sets:
+dnl  with_gnu_ld
+dnl  libitm_gnu_ld_version (possibly)
+dnl
+dnl The last will be a single integer, e.g., version 1.23.45.0.67.89 will
+dnl set libitm_gnu_ld_version to 12345.  Zeros cause problems.
+dnl
+AC_DEFUN([LIBITM_CHECK_LINKER_FEATURES], [
+  # If we're not using GNU ld, then there's no point in even trying these
+  # tests.  Check for that first.  We should have already tested for gld
+  # by now (in libtool), but require it now just to be safe...
+  test -z "$SECTION_LDFLAGS" && SECTION_LDFLAGS=''
+  test -z "$OPT_LDFLAGS" && OPT_LDFLAGS=''
+  AC_REQUIRE([AC_PROG_LD])
+  AC_REQUIRE([AC_PROG_AWK])
+
+  # The name set by libtool depends on the version of libtool.  Shame on us
+  # for depending on an impl detail, but c'est la vie.  Older versions used
+  # ac_cv_prog_gnu_ld, but now it's lt_cv_prog_gnu_ld, and is copied 
back on
+  # top of with_gnu_ld (which is also set by --with-gnu-ld, so that 
actually
+  # makes sense).  We'll test with_gnu_ld everywhere else, so if that isn't
+  # set (hence we're using an older libtool), then set it.
+  if test x${with_gnu_ld+set} != xset; then
+    if test x${ac_cv_prog_gnu_ld+set} != xset; then
+      # We got through "ac_require(ac_prog_ld)" and still not set?  Huh?
+      with_gnu_ld=no
+    else
+      with_gnu_ld=$ac_cv_prog_gnu_ld
+    fi
+  fi
+
+  # Start by getting the version number.  I think the libtool test already
+  # does some of this, but throws away the result.
+  changequote(,)
+  ldver=`$LD --version 2>/dev/null | head -1 | \
+         sed -e 's/GNU ld \(version \)\{0,1\}\(([^)]*) 
\)\{0,1\}\([0-9.][0-9.]*\).*/\3/'`
+  changequote([,])
+  libitm_gnu_ld_version=`echo $ldver | \
+         $AWK -F. '{ if (NF<3) [$]3=0; print ([$]1*100+[$]2)*100+[$]3 }'`
+
+  # Set --gc-sections.
+  if test "$with_gnu_ld" = "notbroken"; then
+    # GNU ld it is!  Joy and bunny rabbits!
+
+    # All these tests are for C++; save the language and the compiler 
flags.
+    # Need to do this so that g++ won't try to link in libstdc++
+    ac_test_CFLAGS="${CFLAGS+set}"
+    ac_save_CFLAGS="$CFLAGS"
+    CFLAGS='-x c++  -Wl,--gc-sections'
+
+    # Check for -Wl,--gc-sections
+    # XXX This test is broken at the moment, as symbols required for 
linking
+    # are now in libsupc++ (not built yet).  In addition, this test has
+    # cored on solaris in the past.  In addition, --gc-sections doesn't
+    # really work at the moment (keeps on discarding used sections, first
+    # .eh_frame and now some of the glibc sections for iconv).
+    # Bzzzzt.  Thanks for playing, maybe next time.
+    AC_MSG_CHECKING([for ld that supports -Wl,--gc-sections])
+    AC_TRY_RUN([
+     int main(void)
+     {
+       try { throw 1; }
+       catch (...) { };
+       return 0;
+     }
+    ], [ac_sectionLDflags=yes],[ac_sectionLDflags=no], 
[ac_sectionLDflags=yes])
+    if test "$ac_test_CFLAGS" = set; then
+      CFLAGS="$ac_save_CFLAGS"
+    else
+      # this is the suspicious part
+      CFLAGS=''
+    fi
+    if test "$ac_sectionLDflags" = "yes"; then
+      SECTION_LDFLAGS="-Wl,--gc-sections $SECTION_LDFLAGS"
+    fi
+    AC_MSG_RESULT($ac_sectionLDflags)
+  fi
+
+  # Set linker optimization flags.
+  if test x"$with_gnu_ld" = x"yes"; then
+    OPT_LDFLAGS="-Wl,-O1 $OPT_LDFLAGS"
+  fi
+
+  AC_SUBST(SECTION_LDFLAGS)
+  AC_SUBST(OPT_LDFLAGS)
+])
+
+
+dnl
+dnl Add version tags to symbols in shared library (or not), additionally
+dnl marking other symbols as private/local (or not).
+dnl
+dnl --enable-symvers=style adds a version script to the linker call when
+dnl       creating the shared library.  The choice of version script is
+dnl       controlled by 'style'.
+dnl --disable-symvers does not.
+dnl  +  Usage:  LIBITM_ENABLE_SYMVERS[(DEFAULT)]
+dnl       Where DEFAULT is either 'yes' or 'no'.  Passing `yes' tries to
+dnl       choose a default style based on linker characteristics.  Passing
+dnl       'no' disables versioning.
+dnl
+AC_DEFUN([LIBITM_ENABLE_SYMVERS], [
+
+LIBITM_ENABLE(symvers,yes,[=STYLE],
+  [enables symbol versioning of the shared library],
+  [permit yes|no|gnu])
+
+# If we never went through the LIBITM_CHECK_LINKER_FEATURES macro, then we
+# don't know enough about $LD to do tricks...
+AC_REQUIRE([LIBITM_CHECK_LINKER_FEATURES])
+# FIXME  The following test is too strict, in theory.
+if test $enable_shared = no ||
+        test "x$LD" = x ||
+        test x$libitm_gnu_ld_version = x; then
+  enable_symvers=no
+fi
+
+# Check to see if libgcc_s exists, indicating that shared libgcc is 
possible.
+if test $enable_symvers != no; then
+  AC_MSG_CHECKING([for shared libgcc])
+  ac_save_CFLAGS="$CFLAGS"
+  CFLAGS=' -lgcc_s'
+  AC_TRY_LINK(, [return 0;], libitm_shared_libgcc=yes, 
libitm_shared_libgcc=no)
+  CFLAGS="$ac_save_CFLAGS"
+  if test $libitm_shared_libgcc = no; then
+    cat > conftest.c <<EOF
+int main (void) { return 0; }
+EOF
+changequote(,)dnl
+    libitm_libgcc_s_suffix=`${CC-cc} $CFLAGS $CPPFLAGS $LDFLAGS \
+			     -shared -shared-libgcc -o conftest.so \
+			     conftest.c -v 2>&1 >/dev/null \
+			     | sed -n 's/^.* -lgcc_s\([^ ]*\) .*$/\1/p'`
+changequote([,])dnl
+    rm -f conftest.c conftest.so
+    if test x${libitm_libgcc_s_suffix+set} = xset; then
+      CFLAGS=" -lgcc_s$libitm_libgcc_s_suffix"
+      AC_TRY_LINK(, [return 0;], libitm_shared_libgcc=yes)
+      CFLAGS="$ac_save_CFLAGS"
+    fi
+  fi
+  AC_MSG_RESULT($libitm_shared_libgcc)
+fi
+
+# For GNU ld, we need at least this version.  The format is described in
+# LIBITM_CHECK_LINKER_FEATURES above.
+libitm_min_gnu_ld_version=21400
+# XXXXXXXXXXX libitm_gnu_ld_version=21390
+
+# Check to see if unspecified "yes" value can win, given results above.
+# Change "yes" into either "no" or a style name.
+if test $enable_symvers = yes; then
+  if test $with_gnu_ld = yes &&
+     test $libitm_shared_libgcc = yes;
+  then
+    if test $libitm_gnu_ld_version -ge $libitm_min_gnu_ld_version ; then
+      enable_symvers=gnu
+    else
+      # The right tools, the right setup, but too old.  Fallbacks?
+      AC_MSG_WARN(=== Linker version $libitm_gnu_ld_version is too old for)
+      AC_MSG_WARN(=== full symbol versioning support in this release of 
GCC.)
+      AC_MSG_WARN(=== You would need to upgrade your binutils to version)
+      AC_MSG_WARN(=== $libitm_min_gnu_ld_version or later and rebuild GCC.)
+      if test $libitm_gnu_ld_version -ge 21200 ; then
+        # Globbing fix is present, proper block support is not.
+        dnl AC_MSG_WARN([=== Dude, you are soooo close.  Maybe we can 
fake it.])
+        dnl enable_symvers=???
+        AC_MSG_WARN([=== Symbol versioning will be disabled.])
+        enable_symvers=no
+      else
+        # 2.11 or older.
+        AC_MSG_WARN([=== Symbol versioning will be disabled.])
+        enable_symvers=no
+      fi
+    fi
+  else
+    # just fail for now
+    AC_MSG_WARN([=== You have requested some kind of symbol versioning, 
but])
+    AC_MSG_WARN([=== either you are not using a supported linker, or 
you are])
+    AC_MSG_WARN([=== not building a shared libgcc_s (which is required).])
+    AC_MSG_WARN([=== Symbol versioning will be disabled.])
+    enable_symvers=no
+  fi
+fi
+
+AM_CONDITIONAL(LIBITM_BUILD_VERSIONED_SHLIB, test $enable_symvers != no)
+AC_MSG_NOTICE(versioning on shared library symbols is $enable_symvers)
+])
+
+
+#endif // COMMON_H
Joseph S. Myers - Nov. 3, 2011, 8:15 p.m.
On Thu, 3 Nov 2011, Aldy Hernandez wrote:

> Index: libitm/acinclude.m4
> ===================================================================
> --- libitm/acinclude.m4	(.../trunk)	(revision 0)
> +++ libitm/acinclude.m4	(.../branches/transactional-memory)	(revision
> 180773)
> @@ -0,0 +1,343 @@
> +dnl ----------------------------------------------------------------------
> +dnl This whole bit snagged from libgfortran.

If you need a configure test in more than one library, do not copy it like 
this; put a common macro in config/ and use it from both libraries.

> +dnl ----------------------------------------------------------------------
> +dnl This whole bit snagged from libstdc++-v3.

Likewise.  There may well be some bits that for whatever reason need to be 
similar but different, or that are specific to libitm, but if something 
can be shared then it should be shared.

> Index: libitm/Makefile.am
> ===================================================================
> --- libitm/Makefile.am	(.../trunk)	(revision 0)
> +++ libitm/Makefile.am	(.../branches/transactional-memory)	(revision
> 180773)

Do you need a FLAGS_TO_PASS setting as in 
<http://gcc.gnu.org/ml/gcc-patches/2011-09/msg01187.html>?  (The way to 
test is to do a multilib build and install, passing infodir=/some/where on 
the "make install" line, and see if the manual ends up installed in the 
configured directory under $prefix as well or instead of the directory 
passed on the "make install" line - it should only go in the directory 
passed to "make install".)
Torvald Riegel - Nov. 6, 2011, 3:44 p.m.
On Thu, 2011-11-03 at 20:15 +0000, Joseph S. Myers wrote:
> Do you need a FLAGS_TO_PASS setting as in 
> <http://gcc.gnu.org/ml/gcc-patches/2011-09/msg01187.html>?  (The way to 
> test is to do a multilib build and install, passing infodir=/some/where on 
> the "make install" line, and see if the manual ends up installed in the 
> configured directory under $prefix as well or instead of the directory 
> passed on the "make install" line - it should only go in the directory 
> passed to "make install".)

I can't reproduce this in a multilib config. The libitm info file is
correctly installed to the infodir= location only, so I'd assume this
works fine as is. But thanks for the note.

Torvald

Patch

Index: libitm/Makefile.am
===================================================================
--- libitm/Makefile.am	(.../trunk)	(revision 0)
+++ libitm/Makefile.am	(.../branches/transactional-memory)	(revision 180773)
@@ -0,0 +1,98 @@ 
+## Process this file with automake to produce Makefile.in
+
+ACLOCAL_AMFLAGS = -I .. -I ../config
+SUBDIRS = testsuite
+
+## May be used by toolexeclibdir.
+gcc_version := $(shell cat $(top_srcdir)/../gcc/BASE-VER)
+
+abi_version = -fabi-version=4
+
+config_path = @config_path@
+search_path = $(addprefix $(top_srcdir)/config/, $(config_path)) 
$(top_srcdir)
+
+fincludedir = $(libdir)/gcc/$(target_alias)/$(gcc_version)/finclude
+libsubincludedir = $(libdir)/gcc/$(target_alias)/$(gcc_version)/include
+
+vpath % $(strip $(search_path))
+
+AM_CPPFLAGS = $(addprefix -I, $(search_path))
+AM_CFLAGS = $(XCFLAGS)
+AM_CXXFLAGS = -std=gnu++0x -funwind-tables -fno-exceptions -fno-rtti \
+	$(XCFLAGS) $(abi_version)
+AM_CCASFLAGS = $(XCFLAGS)
+AM_LDFLAGS = $(XLDFLAGS) $(SECTION_LDFLAGS) $(OPT_LDFLAGS)
+
+toolexeclib_LTLIBRARIES = libitm.la
+nodist_toolexeclib_HEADERS = libitm.spec
+
+if LIBITM_BUILD_VERSIONED_SHLIB
+libitm_version_script = -Wl,--version-script,$(top_srcdir)/libitm.map
+else
+libitm_version_script =
+endif
+libitm_version_info = -version-info $(libtool_VERSION)
+
+# Force link with C, not C++.  For now, while we're using C++ we don't
+# want or need libstdc++.
+libitm_la_LINK = $(LINK)
+libitm_la_LDFLAGS = $(libitm_version_info) $(libitm_version_script) \
+        -no-undefined
+
+libitm_la_SOURCES = \
+	aatree.cc alloc.cc alloc_c.cc alloc_cpp.cc barrier.cc beginend.cc \
+	clone.cc cacheline.cc cachepage.cc eh_cpp.cc local.cc \
+	query.cc retry.cc rwlock.cc useraction.cc util.cc \
+	sjlj.S tls.cc method-serial.cc method-gl.cc
+
+if ARCH_X86
+libitm_la_SOURCES += x86_sse.cc x86_avx.cc
+x86_sse.lo : XCFLAGS += -msse
+x86_avx.lo : XCFLAGS += -mavx
+endif
+
+if ARCH_FUTEX
+libitm_la_SOURCES += futex.cc
+endif
+
+# Automake Documentation:
+# If your package has Texinfo files in many directories, you can use the
+# variable TEXINFO_TEX to tell Automake where to find the canonical
+# `texinfo.tex' for your package. The value of this variable should be
+# the relative path from the current `Makefile.am' to `texinfo.tex'.
+TEXINFO_TEX   = ../gcc/doc/include/texinfo.tex
+
+# Defines info, dvi, pdf and html targets
+MAKEINFOFLAGS = -I $(srcdir)/../gcc/doc/include
+info_TEXINFOS = libitm.texi
+
+# AM_CONDITIONAL on configure option --generated-files-in-srcdir
+if GENINSRC
+STAMP_GENINSRC = stamp-geninsrc
+else
+STAMP_GENINSRC =
+endif
+
+# AM_CONDITIONAL on configure check ACX_CHECK_PROG_VER([MAKEINFO])
+if BUILD_INFO
+STAMP_BUILD_INFO = stamp-build-info
+else
+STAMP_BUILD_INFO =
+endif
+
+
+all-local: $(STAMP_GENINSRC)
+
+stamp-geninsrc: libitm.info
+	cp -p $(top_builddir)/libitm.info $(srcdir)/libitm.info
+	@touch $@
+
+libitm.info: $(STAMP_BUILD_INFO)
+
+stamp-build-info: libitm.texi
+	$(MAKEINFO) $(AM_MAKEINFOFLAGS) $(MAKEINFOFLAGS) -I $(srcdir) -o 
libitm.info $(srcdir)/libitm.texi
+	@touch $@
+
+
+CLEANFILES = $(STAMP_GENINSRC) $(STAMP_BUILD_INFO) libitm.info
+MAINTAINERCLEANFILES = $(srcdir)/libitm.info
Index: libitm/memcpy.cc
===================================================================
--- libitm/memcpy.cc	(.../trunk)	(revision 0)
+++ libitm/memcpy.cc	(.../branches/transactional-memory)	(revision 180773)
@@ -0,0 +1,365 @@ 
+/* Copyright (C) 2008, 2009 Free Software Foundation, Inc.
+   Contributed by Richard Henderson <rth@redhat.com>.
+
+   This file is part of the GNU Transactional Memory Library (libitm).
+
+   Libitm is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 3 of the License, or
+   (at your option) any later version.
+
+   Libitm is distributed in the hope that it will be useful, but 
WITHOUT ANY
+   WARRANTY; without even the implied warranty of MERCHANTABILITY or 
FITNESS
+   FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+   more details.
+
+   Under Section 7 of GPL version 3, you are granted additional
+   permissions described in the GCC Runtime Library Exception, version
+   3.1, as published by the Free Software Foundation.
+
+   You should have received a copy of the GNU General Public License and
+   a copy of the GCC Runtime Library Exception along with this program;
+   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#include "libitm_i.h"
+
+using namespace GTM;
+
+static void
+do_memcpy (uintptr_t idst, uintptr_t isrc, size_t size,
+	   abi_dispatch::lock_type W, abi_dispatch::lock_type R)
+{
+  abi_dispatch *disp = abi_disp();
+  // The position in the destination cacheline where *IDST starts.
+  uintptr_t dofs = idst & (CACHELINE_SIZE - 1);
+  // The position in the source cacheline where *ISRC starts.
+  uintptr_t sofs = isrc & (CACHELINE_SIZE - 1);
+  const gtm_cacheline *src
+    = reinterpret_cast<const gtm_cacheline *>(isrc & -CACHELINE_SIZE);
+  gtm_cacheline *dst
+    = reinterpret_cast<gtm_cacheline *>(idst & -CACHELINE_SIZE);
+  const gtm_cacheline *sline;
+  abi_dispatch::mask_pair dpair;
+
+  if (size == 0)
+    return;
+
+  // If both SRC and DST data start at the same position in the cachelines,
+  // we can easily copy the data in tandem, cacheline by cacheline...
+  if (dofs == sofs)
+    {
+      // We copy the data in three stages:
+
+      // (a) Copy stray bytes at the beginning that are smaller than a
+      // cacheline.
+      if (sofs != 0)
+	{
+	  size_t sleft = CACHELINE_SIZE - sofs;
+	  size_t min = (size <= sleft ? size : sleft);
+
+	  dpair = disp->write_lock(dst, W);
+	  sline = disp->read_lock(src, R);
+	  *dpair.mask |= (((gtm_cacheline_mask)1 << min) - 1) << sofs;
+	  memcpy (&dpair.line->b[sofs], &sline->b[sofs], min);
+	  dst++;
+	  src++;
+	  size -= min;
+	}
+
+      // (b) Copy subsequent cacheline sized chunks.
+      while (size >= CACHELINE_SIZE)
+	{
+	  dpair = disp->write_lock(dst, W);
+	  sline = disp->read_lock(src, R);
+	  *dpair.mask = -1;
+	  *dpair.line = *sline;
+	  dst++;
+	  src++;
+	  size -= CACHELINE_SIZE;
+	}
+
+      // (c) Copy anything left over.
+      if (size != 0)
+	{
+	  dpair = disp->write_lock(dst, W);
+	  sline = disp->read_lock(src, R);
+	  *dpair.mask |= ((gtm_cacheline_mask)1 << size) - 1;
+	  memcpy (dpair.line, sline, size);
+	}
+    }
+  // ... otherwise, we must copy the data in disparate hunks using
+  // temporary storage.
+  else
+    {
+      gtm_cacheline c;
+      size_t sleft = CACHELINE_SIZE - sofs;
+
+      sline = disp->read_lock(src, R);
+
+      // As above, we copy the data in three stages:
+
+      // (a) Copy stray bytes at the beginning that are smaller than a
+      // cacheline.
+      if (dofs != 0)
+	{
+	  size_t dleft = CACHELINE_SIZE - dofs;
+	  size_t min = (size <= dleft ? size : dleft);
+
+	  dpair = disp->write_lock(dst, W);
+	  *dpair.mask |= (((gtm_cacheline_mask)1 << min) - 1) << dofs;
+
+	  // If what's left in the source cacheline will fit in the
+	  // rest of the destination cacheline, straight up copy it.
+	  if (min <= sleft)
+	    {
+	      memcpy (&dpair.line->b[dofs], &sline->b[sofs], min);
+	      sofs += min;
+	    }
+	  // Otherwise, we need more bits from the source cacheline
+	  // that are available.  Piece together what we need from
+	  // contiguous (source) cachelines, into temp space, and copy
+	  // it over.
+	  else
+	    {
+	      memcpy (&c, &sline->b[sofs], sleft);
+	      sline = disp->read_lock(++src, R);
+	      sofs = min - sleft;
+	      memcpy (&c.b[sleft], sline, sofs);
+	      memcpy (&dpair.line->b[dofs], &c, min);
+	    }
+	  sleft = CACHELINE_SIZE - sofs;
+
+	  dst++;
+	  size -= min;
+	}
+
+      // (b) Copy subsequent cacheline sized chunks.
+      while (size >= CACHELINE_SIZE)
+	{
+	  // We have a full (destination) cacheline where to put the
+	  // data, but to get to the corresponding cacheline sized
+	  // chunk in the source, we have to piece together two
+	  // contiguous source cachelines.
+
+	  memcpy (&c, &sline->b[sofs], sleft);
+	  sline = disp->read_lock(++src, R);
+	  memcpy (&c.b[sleft], sline, sofs);
+
+	  dpair = disp->write_lock(dst, W);
+	  *dpair.mask = -1;
+	  *dpair.line = c;
+
+	  dst++;
+	  size -= CACHELINE_SIZE;
+	}
+
+      // (c) Copy anything left over.
+      if (size != 0)
+	{
+	  dpair = disp->write_lock(dst, W);
+	  *dpair.mask |= ((gtm_cacheline_mask)1 << size) - 1;
+	  // If what's left to copy is entirely in the remaining
+	  // source cacheline, do it.
+	  if (size <= sleft)
+	    memcpy (dpair.line, &sline->b[sofs], size);
+	  // Otherwise, piece together the remaining bits, and copy.
+	  else
+	    {
+	      memcpy (&c, &sline->b[sofs], sleft);
+	      sline = disp->read_lock(++src, R);
+	      memcpy (&c.b[sleft], sline, size - sleft);
+	      memcpy (dpair.line, &c, size);
+	    }
+	}
+    }
+}
+
+static void
+do_memmove (uintptr_t idst, uintptr_t isrc, size_t size,
+	    abi_dispatch::lock_type W, abi_dispatch::lock_type R)
+{
+  abi_dispatch *disp = abi_disp();
+  uintptr_t dleft, sleft, sofs, dofs;
+  const gtm_cacheline *sline;
+  abi_dispatch::mask_pair dpair;
+
+  if (size == 0)
+    return;
+
+  /* The co-aligned memmove below doesn't work for DST == SRC, so filter
+     that out.  It's tempting to just return here, as this is a no-op move.
+     However, our caller has the right to expect the locks to be acquired
+     as advertized.  */
+  if (__builtin_expect (idst == isrc, 0))
+    {
+      /* If the write lock is already acquired, nothing to do.  */
+      if (W == abi_dispatch::WaW)
+	return;
+      /* If the destination is protected, acquire a write lock.  */
+      if (W != abi_dispatch::NOLOCK)
+	R = abi_dispatch::RfW;
+      /* Notice serial mode, where we don't acquire locks at all.  */
+      if (R == abi_dispatch::NOLOCK)
+	return;
+
+      idst = isrc + size;
+      for (isrc &= -CACHELINE_SIZE; isrc < idst; isrc += CACHELINE_SIZE)
+	disp->read_lock(reinterpret_cast<const gtm_cacheline *>(isrc), R);
+      return;
+    }
+
+  /* Fall back to memcpy if the implementation above can handle it.  */
+  if (idst < isrc || isrc + size <= idst)
+    {
+      do_memcpy (idst, isrc, size, W, R);
+      return;
+    }
+
+  /* What remains requires a backward copy from the end of the blocks.  */
+  idst += size;
+  isrc += size;
+  dofs = idst & (CACHELINE_SIZE - 1);
+  sofs = isrc & (CACHELINE_SIZE - 1);
+  dleft = CACHELINE_SIZE - dofs;
+  sleft = CACHELINE_SIZE - sofs;
+
+  gtm_cacheline *dst
+    = reinterpret_cast<gtm_cacheline *>(idst & -CACHELINE_SIZE);
+  const gtm_cacheline *src
+    = reinterpret_cast<const gtm_cacheline *>(isrc & -CACHELINE_SIZE);
+  if (dofs == 0)
+    dst--;
+  if (sofs == 0)
+    src--;
+
+  if (dofs == sofs)
+    {
+      /* Since DST and SRC are co-aligned, and we didn't use the memcpy
+	 optimization above, that implies that SIZE > CACHELINE_SIZE.  */
+      if (sofs != 0)
+	{
+	  dpair = disp->write_lock(dst, W);
+	  sline = disp->read_lock(src, R);
+	  *dpair.mask |= ((gtm_cacheline_mask)1 << sleft) - 1;
+	  memcpy (dpair.line, sline, sleft);
+	  dst--;
+	  src--;
+	  size -= sleft;
+	}
+
+      while (size >= CACHELINE_SIZE)
+	{
+	  dpair = disp->write_lock(dst, W);
+	  sline = disp->read_lock(src, R);
+	  *dpair.mask = -1;
+	  *dpair.line = *sline;
+	  dst--;
+	  src--;
+	  size -= CACHELINE_SIZE;
+	}
+
+      if (size != 0)
+	{
+	  size_t ofs = CACHELINE_SIZE - size;
+	  dpair = disp->write_lock(dst, W);
+	  sline = disp->read_lock(src, R);
+	  *dpair.mask |= (((gtm_cacheline_mask)1 << size) - 1) << ofs;
+	  memcpy (&dpair.line->b[ofs], &sline->b[ofs], size);
+	}
+    }
+  else
+    {
+      gtm_cacheline c;
+
+      sline = disp->read_lock(src, R);
+      if (dofs != 0)
+	{
+	  size_t min = (size <= dofs ? size : dofs);
+
+	  if (min <= sofs)
+	    {
+	      sofs -= min;
+	      memcpy (&c, &sline->b[sofs], min);
+	    }
+	  else
+	    {
+	      size_t min_ofs = min - sofs;
+	      memcpy (&c.b[min_ofs], sline, sofs);
+	      sline = disp->read_lock(--src, R);
+	      sofs = CACHELINE_SIZE - min_ofs;
+	      memcpy (&c, &sline->b[sofs], min_ofs);
+	    }
+
+	  dofs = dleft - min;
+	  dpair = disp->write_lock(dst, W);
+	  *dpair.mask |= (((gtm_cacheline_mask)1 << min) - 1) << dofs;
+	  memcpy (&dpair.line->b[dofs], &c, min);
+
+	  sleft = CACHELINE_SIZE - sofs;
+	  dst--;
+	  size -= min;
+	}
+
+      while (size >= CACHELINE_SIZE)
+	{
+	  memcpy (&c.b[sleft], sline, sofs);
+	  sline = disp->read_lock(--src, R);
+	  memcpy (&c, &sline->b[sofs], sleft);
+
+	  dpair = disp->write_lock(dst, W);
+	  *dpair.mask = -1;
+	  *dpair.line = c;
+
+	  dst--;
+	  size -= CACHELINE_SIZE;
+	}
+
+      if (size != 0)
+	{
+	  dofs = CACHELINE_SIZE - size;
+
+	  memcpy (&c.b[sleft], sline, sofs);
+	  if (sleft > dofs)
+	    {
+	      sline = disp->read_lock(--src, R);
+	      memcpy (&c, &sline->b[sofs], sleft);
+	    }
+
+	  dpair = disp->write_lock(dst, W);
+	  *dpair.mask |= (gtm_cacheline_mask)-1 << dofs;
+	  memcpy (&dpair.line->b[dofs], &c.b[dofs], size);
+	}
+    }
+}
+
+#define ITM_MEM_DEF(NAME, READ, WRITE) \
+void ITM_REGPARM _ITM_memcpy##NAME(void *dst, const void *src, size_t 
size)  \
+{									     \
+  do_memcpy ((uintptr_t)dst, (uintptr_t)src, size,			     \
+	     abi_dispatch::WRITE, abi_dispatch::READ);			     \
+}									     \
+void ITM_REGPARM _ITM_memmove##NAME(void *dst, const void *src, size_t 
size) \
+{									     \
+  do_memmove ((uintptr_t)dst, (uintptr_t)src, size,			     \
+	      abi_dispatch::WRITE, abi_dispatch::READ);			     \
+}
+
+ITM_MEM_DEF(RnWt,	NOLOCK,		W)
+ITM_MEM_DEF(RnWtaR,	NOLOCK,		WaR)
+ITM_MEM_DEF(RnWtaW,	NOLOCK,		WaW)
+
+ITM_MEM_DEF(RtWn,	R,		NOLOCK)
+ITM_MEM_DEF(RtWt,	R,		W)
+ITM_MEM_DEF(RtWtaR,	R,		WaR)
+ITM_MEM_DEF(RtWtaW,	R,		WaW)
+
+ITM_MEM_DEF(RtaRWn,	RaR,		NOLOCK)
+ITM_MEM_DEF(RtaRWt,	RaR,		W)
+ITM_MEM_DEF(RtaRWtaR,	RaR,		WaR)
+ITM_MEM_DEF(RtaRWtaW,	RaR,		WaW)
+
+ITM_MEM_DEF(RtaWWn,	RaW,		NOLOCK)
+ITM_MEM_DEF(RtaWWt,	RaW,		W)
+ITM_MEM_DEF(RtaWWtaR,	RaW,		WaR)
+ITM_MEM_DEF(RtaWWtaW,	RaW,		WaW)
Index: libitm/local.cc
===================================================================
--- libitm/local.cc	(.../trunk)	(revision 0)
+++ libitm/local.cc	(.../branches/transactional-memory)	(revision 180773)
@@ -0,0 +1,129 @@ 
+/* Copyright (C) 2008, 2009, 2011 Free Software Foundation, Inc.
+   Contributed by Richard Henderson <rth@redhat.com>.
+
+   This file is part of the GNU Transactional Memory Library (libitm).
+
+   Libitm is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 3 of the License, or
+   (at your option) any later version.
+
+   Libitm is distributed in the hope that it will be useful, but 
WITHOUT ANY
+   WARRANTY; without even the implied warranty of MERCHANTABILITY or 
FITNESS
+   FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+   more details.
+
+   Under Section 7 of GPL version 3, you are granted additional
+   permissions described in the GCC Runtime Library Exception, version
+   3.1, as published by the Free Software Foundation.
+
+   You should have received a copy of the GNU General Public License and
+   a copy of the GCC Runtime Library Exception along with this program;
+   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#include "libitm_i.h"
+
+namespace GTM HIDDEN {
+
+struct gtm_undolog_entry
+{
+  void *addr;
+  size_t len;
+  char saved[];
+};
+
+
+void
+gtm_thread::commit_undolog ()
+{
+  size_t i, n = undolog.size();
+
+  if (n > 0)
+    {
+      for (i = 0; i < n; ++i)
+	free (undolog[i]);
+      this->undolog.clear();
+    }
+}
+
+void
+gtm_thread::rollback_undolog (size_t until_size)
+{
+  size_t i, n = undolog.size();
+
+  if (n > 0)
+    {
+      for (i = n; i-- > until_size; )
+	{
+	  gtm_undolog_entry *u = *undolog.pop();
+	  if (u)
+	    {
+	      memcpy (u->addr, u->saved, u->len);
+	      free (u);
+	    }
+	}
+    }
+}
+
+/* Forget any references to PTR in the local log.  */
+
+void
+gtm_thread::drop_references_undolog (const void *ptr, size_t len)
+{
+  size_t i, n = undolog.size();
+
+  if (n > 0)
+    {
+      for (i = n; i > 0; i--)
+	{
+	  gtm_undolog_entry *u = undolog[i];
+	  /* ?? Do we need such granularity, or can we get away with
+	     just comparing PTR and LEN. ??  */
+	  if ((const char *)u->addr >= (const char *)ptr
+	      && ((const char *)u->addr + u->len <= (const char *)ptr + len))
+	    {
+	      free (u);
+	      undolog[i] = NULL;
+	    }
+	}
+    }
+}
+
+void ITM_REGPARM
+GTM_LB (const void *ptr, size_t len)
+{
+  gtm_thread *tx = gtm_thr();
+  gtm_undolog_entry *undo;
+
+  undo = (gtm_undolog_entry *)
+      xmalloc (sizeof (struct gtm_undolog_entry) + len);
+  undo->addr = (void *) ptr;
+  undo->len = len;
+
+  tx->undolog.push()[0] = undo;
+
+  memcpy (undo->saved, ptr, len);
+}
+
+} // namespace GTM
+
+using namespace GTM;
+
+void _ITM_LB (const void *ptr, size_t len) ITM_REGPARM
+	__attribute__((alias("GTM_LB")));
+
+#define ITM_LOG_DEF(T) \
+void ITM_REGPARM _ITM_L##T (const _ITM_TYPE_##T *ptr) \
+{ GTM_LB (ptr, sizeof (*ptr)); }
+
+ITM_LOG_DEF(U1)
+ITM_LOG_DEF(U2)
+ITM_LOG_DEF(U4)
+ITM_LOG_DEF(U8)
+ITM_LOG_DEF(F)
+ITM_LOG_DEF(D)
+ITM_LOG_DEF(E)
+ITM_LOG_DEF(CF)
+ITM_LOG_DEF(CD)
+ITM_LOG_DEF(CE)
Index: libitm/libitm_i.h
===================================================================
--- libitm/libitm_i.h	(.../trunk)	(revision 0)
+++ libitm/libitm_i.h	(.../branches/transactional-memory)	(revision 180773)
@@ -0,0 +1,302 @@ 
+/* Copyright (C) 2008, 2009, 2011 Free Software Foundation, Inc.
+   Contributed by Richard Henderson <rth@redhat.com>.
+
+   This file is part of the GNU Transactional Memory Library (libitm).
+
+   Libitm is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 3 of the License, or
+   (at your option) any later version.
+
+   Libitm is distributed in the hope that it will be useful, but 
WITHOUT ANY
+   WARRANTY; without even the implied warranty of MERCHANTABILITY or 
FITNESS
+   FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+   more details.
+
+   Under Section 7 of GPL version 3, you are granted additional
+   permissions described in the GCC Runtime Library Exception, version
+   3.1, as published by the Free Software Foundation.
+
+   You should have received a copy of the GNU General Public License and
+   a copy of the GCC Runtime Library Exception along with this program;
+   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+   <http://www.gnu.org/licenses/>.  */
+
+/* The following are internal implementation functions and definitions.
+   To distinguish them from those defined by the Intel ABI, they all
+   begin with GTM/gtm.  */
+
+#ifndef LIBITM_I_H
+#define LIBITM_I_H 1
+
+#include "libitm.h"
+#include "config.h"
+
+#include <cassert>
+#include <cstdlib>
+#include <cstring>
+#include <unwind.h>
+#include <type_traits>
+
+#include "common.h"
+
+namespace GTM HIDDEN {
+
+using namespace std;
+
+// A helper template for accessing an unsigned integral of SIZE bytes.
+template<size_t SIZE> struct sized_integral { };
+template<> struct sized_integral<1> { typedef uint8_t type; };
+template<> struct sized_integral<2> { typedef uint16_t type; };
+template<> struct sized_integral<4> { typedef uint32_t type; };
+template<> struct sized_integral<8> { typedef uint64_t type; };
+
+typedef unsigned int gtm_word __attribute__((mode (word)));
+
+// These values are given to GTM_restart_transaction and indicate the
+// reason for the restart.  The reason is used to decide what STM
+// implementation should be used during the next iteration.
+enum gtm_restart_reason
+{
+  RESTART_REALLOCATE,
+  RESTART_LOCKED_READ,
+  RESTART_LOCKED_WRITE,
+  RESTART_VALIDATE_READ,
+  RESTART_VALIDATE_WRITE,
+  RESTART_VALIDATE_COMMIT,
+  RESTART_SERIAL_IRR,
+  RESTART_NOT_READONLY,
+  RESTART_CLOSED_NESTING,
+  RESTART_INIT_METHOD_GROUP,
+  NUM_RESTARTS,
+  NO_RESTART = NUM_RESTARTS
+};
+
+} // namespace GTM
+
+#include "target.h"
+#include "rwlock.h"
+#include "aatree.h"
+#include "cacheline.h"
+#include "cachepage.h"
+#include "stmlock.h"
+#include "dispatch.h"
+#include "containers.h"
+
+namespace GTM HIDDEN {
+
+// This type is private to alloc.c, but needs to be defined so that
+// the template used inside gtm_thread can instantiate.
+struct gtm_alloc_action
+{
+  void (*free_fn)(void *);
+  bool allocated;
+};
+
+// This type is private to local.c.
+struct gtm_undolog_entry;
+
+struct gtm_thread;
+
+// A transaction checkpoint: data that has to saved and restored when doing
+// closed nesting.
+struct gtm_transaction_cp
+{
+  gtm_jmpbuf jb;
+  size_t undolog_size;
+  aa_tree<uintptr_t, gtm_alloc_action> alloc_actions;
+  size_t user_actions_size;
+  _ITM_transactionId_t id;
+  uint32_t prop;
+  uint32_t cxa_catch_count;
+  void *cxa_unthrown;
+  // We might want to use a different but compatible dispatch method for
+  // a nested transaction.
+  abi_dispatch *disp;
+  // Nesting level of this checkpoint (1 means that this is a checkpoint of
+  // the outermost transaction).
+  uint32_t nesting;
+
+  void save(gtm_thread* tx);
+  void commit(gtm_thread* tx);
+};
+
+// Contains all thread-specific data required by the entire library.
+// This includes all data relevant to a single transaction. Because most
+// thread-specific data is about the current transaction, we also refer to
+// the transaction-specific parts of gtm_thread as "the transaction" (the
+// same applies to names of variables and arguments).
+// All but the shared part of this data structure are thread-local data.
+// gtm_thread could be split into transaction-specific structures and other
+// per-thread data (with those parts then nested in gtm_thread), but this
+// would make it harder to later rearrange individual members to 
optimize data
+// accesses. Thus, for now we keep one flat object, and will only split 
it if
+// the code gets too messy.
+struct gtm_thread
+{
+
+  struct user_action
+  {
+    _ITM_userCommitFunction fn;
+    void *arg;
+    bool on_commit;
+    _ITM_transactionId_t resuming_id;
+  };
+
+  // The jump buffer by which GTM_longjmp restarts the transaction.
+  // This field *must* be at the beginning of the transaction.
+  gtm_jmpbuf jb;
+
+  // Data used by local.c for the undo log for both local and shared 
memory.
+  vector<gtm_undolog_entry*> undolog;
+
+  // Data used by alloc.c for the malloc/free undo log.
+  aa_tree<uintptr_t, gtm_alloc_action> alloc_actions;
+
+  // Data used by useraction.c for the user-defined commit/abort handlers.
+  vector<user_action> user_actions;
+
+  // A numerical identifier for this transaction.
+  _ITM_transactionId_t id;
+
+  // The _ITM_codeProperties of this transaction as given by the compiler.
+  uint32_t prop;
+
+  // The nesting depth for subsequently started transactions. This variable
+  // will be set to 1 when starting an outermost transaction.
+  uint32_t nesting;
+
+  // Set if this transaction owns the serial write lock.
+  // Can be reset only when restarting the outermost transaction.
+  static const uint32_t STATE_SERIAL		= 0x0001;
+  // Set if the serial-irrevocable dispatch table is installed.
+  // Implies that no logging is being done, and abort is not possible.
+  // Can be reset only when restarting the outermost transaction.
+  static const uint32_t STATE_IRREVOCABLE	= 0x0002;
+
+  // A bitmask of the above.
+  uint32_t state;
+
+  // In order to reduce cacheline contention on global_tid during
+  // beginTransaction, we allocate a block of 2**N ids to the thread
+  // all at once.  This number is the next value to be allocated from
+  // the block, or 0 % 2**N if no such block is allocated.
+  _ITM_transactionId_t local_tid;
+
+  // Data used by eh_cpp.c for managing exceptions within the transaction.
+  uint32_t cxa_catch_count;
+  void *cxa_unthrown;
+  void *eh_in_flight;
+
+  // Checkpoints for closed nesting.
+  vector<gtm_transaction_cp> parent_txns;
+
+  // Data used by retry.c for deciding what STM implementation should
+  // be used for the next iteration of the transaction.
+  // Only restart_total is reset to zero when the transaction commits, the
+  // other counters are total values for all previously executed 
transactions.
+  uint32_t restart_reason[NUM_RESTARTS];
+  uint32_t restart_total;
+
+  // *** The shared part of gtm_thread starts here. ***
+  // Shared state is on separate cachelines to avoid false sharing with
+  // thread-local parts of gtm_thread.
+
+  // Points to the next thread in the list of all threads.
+  gtm_thread *next_thread __attribute__((__aligned__(HW_CACHELINE_SIZE)));
+
+  // If this transaction is inactive, shared_state is ~0. Otherwise, 
this is
+  // an active or serial transaction.
+  gtm_word shared_state;
+
+  // The lock that provides access to serial mode.  Non-serialized
+  // transactions acquire read locks; a serialized transaction aquires
+  // a write lock.
+  static gtm_rwlock serial_lock;
+
+  // The head of the list of all threads' transactions.
+  static gtm_thread *list_of_threads;
+  // The number of all registered threads.
+  static unsigned number_of_threads;
+
+  // In alloc.cc
+  void commit_allocations (bool, aa_tree<uintptr_t, gtm_alloc_action>*);
+  void record_allocation (void *, void (*)(void *));
+  void forget_allocation (void *, void (*)(void *));
+  void drop_references_allocations (const void *ptr)
+  {
+    this->alloc_actions.erase((uintptr_t) ptr);
+  }
+
+  // In beginend.cc
+  void rollback (gtm_transaction_cp *cp = 0, bool aborting = false);
+  bool trycommit ();
+  void restart (gtm_restart_reason) ITM_NORETURN;
+
+  gtm_thread();
+  ~gtm_thread();
+
+  static void *operator new(size_t);
+  static void operator delete(void *);
+
+  // Invoked from assembly language, thus the "asm" specifier on
+  // the name, avoiding complex name mangling.
+  static uint32_t begin_transaction(uint32_t, const gtm_jmpbuf *)
+	__asm__("GTM_begin_transaction") ITM_REGPARM;
+
+  // In eh_cpp.cc
+  void revert_cpp_exceptions (gtm_transaction_cp *cp = 0);
+
+  // In local.cc
+  void commit_undolog (void);
+  void rollback_undolog (size_t until_size = 0);
+  void drop_references_undolog (const void *, size_t);
+
+  // In retry.cc
+  // Must be called outside of transactions (i.e., after rollback).
+  void decide_retry_strategy (gtm_restart_reason);
+  abi_dispatch* decide_begin_dispatch (uint32_t prop);
+  void number_of_threads_changed(unsigned previous, unsigned now);
+  // Must be called from serial mode. Does not call set_abi_disp().
+  void set_default_dispatch(abi_dispatch* disp);
+
+  // In method-serial.cc
+  void serialirr_mode ();
+
+  // In useraction.cc
+  void rollback_user_actions (size_t until_size = 0);
+  void commit_user_actions ();
+};
+
+} // namespace GTM
+
+#include "tls.h"
+
+namespace GTM HIDDEN {
+
+// An unscaled count of the number of times we should spin attempting to
+// acquire locks before we block the current thread and defer to the OS.
+// This variable isn't used when the standard POSIX lock implementations
+// are used.
+extern uint64_t gtm_spin_count_var;
+
+extern "C" uint32_t GTM_longjmp (const gtm_jmpbuf *, uint32_t, uint32_t)
+	ITM_NORETURN ITM_REGPARM;
+
+extern "C" void GTM_LB (const void *, size_t) ITM_REGPARM;
+
+extern void GTM_error (const char *fmt, ...)
+	__attribute__((format (printf, 1, 2)));
+extern void GTM_fatal (const char *fmt, ...)
+	__attribute__((noreturn, format (printf, 1, 2)));
+
+extern abi_dispatch *dispatch_serial();
+extern abi_dispatch *dispatch_serialirr();
+extern abi_dispatch *dispatch_serialirr_onwrite();
+extern abi_dispatch *dispatch_gl_wt();
+
+extern gtm_cacheline_mask gtm_mask_stack(gtm_cacheline *, 
gtm_cacheline_mask);
+
+} // namespace GTM
+
+#endif // LIBITM_I_H
Index: libitm/common.h
===================================================================
--- libitm/common.h	(.../trunk)	(revision 0)
+++ libitm/common.h	(.../branches/transactional-memory)	(revision 180773)
@@ -0,0 +1,63 @@ 
+/* Copyright (C) 2008, 2009 Free Software Foundation, Inc.
+   Contributed by Richard Henderson <rth@redhat.com>.
+
+   This file is part of the GNU Transactional Memory Library (libitm).
+
+   Libitm is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 3 of the License, or
+   (at your option) any later version.
+
+   Libitm is distributed in the hope that it will be useful, but 
WITHOUT ANY
+   WARRANTY; without even the implied warranty of MERCHANTABILITY or 
FITNESS
+   FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+   more details.
+
+   Under Section 7 of GPL version 3, you are granted additional
+   permissions described in the GCC Runtime Library Exception, version
+   3.1, as published by the Free Software Foundation.
+
+   You should have received a copy of the GNU General Public License and
+   a copy of the GCC Runtime Library Exception along with this program;
+   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+   <http://www.gnu.org/licenses/>.  */
+
+/* The following are internal implementation functions and definitions.
+   To distinguish them from those defined by the Intel ABI, they all
+   begin with GTM/gtm.  */
+
+#ifndef COMMON_H
+#define COMMON_H 1
+
+#define UNUSED		__attribute__((unused))
+#define ALWAYS_INLINE	__attribute__((always_inline))
+#ifdef HAVE_ATTRIBUTE_VISIBILITY
+# define HIDDEN		__attribute__((visibility("hidden")))
+#else
+# define HIDDEN
+#endif
+
+#define likely(X)	__builtin_expect((X) != 0, 1)
+#define unlikely(X)	__builtin_expect((X), 0)
+
+namespace GTM HIDDEN {
+
+// Locally defined protected allocation functions.
+//
+// To avoid dependency on libstdc++ new/delete, as well as to not
+// interfere with the wrapping of the global new/delete we wrap for
+// the user in alloc_cpp.cc, use class-local versions that defer
+// to malloc/free.  Recall that operator new/delete does not go through
+// normal lookup and so we cannot simply inject a version into the
+// GTM namespace.
+// If separate_cl is true, the allocator will try to return memory that 
is on
+// cache lines that are not shared with any object used by another thread.
+extern void * xmalloc (size_t s, bool separate_cl = false)
+  __attribute__((malloc, nothrow));
+extern void * xrealloc (void *p, size_t s, bool separate_cl = false)
+  __attribute__((malloc, nothrow));
+
+} // namespace GTM