From patchwork Thu Nov 3 17:49:28 2011 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Aldy Hernandez X-Patchwork-Id: 123476 Return-Path: X-Original-To: incoming@patchwork.ozlabs.org Delivered-To: patchwork-incoming@bilbo.ozlabs.org Received: from sourceware.org (server1.sourceware.org [209.132.180.131]) by ozlabs.org (Postfix) with SMTP id 678A8B6F70 for ; Fri, 4 Nov 2011 04:50:24 +1100 (EST) Received: (qmail 2420 invoked by alias); 3 Nov 2011 17:50:18 -0000 Received: (qmail 2207 invoked by uid 22791); 3 Nov 2011 17:50:00 -0000 X-SWARE-Spam-Status: No, hits=-5.3 required=5.0 tests=AWL, BAYES_50, KAM_ADVERT2, RCVD_IN_DNSWL_HI, RP_MATCHES_RCVD, SPF_HELO_PASS, TW_CP, TW_CX, TW_GJ X-Spam-Check-By: sourceware.org Received: from mx1.redhat.com (HELO mx1.redhat.com) (209.132.183.28) by sourceware.org (qpsmtpd/0.43rc1) with ESMTP; Thu, 03 Nov 2011 17:49:30 +0000 Received: from int-mx02.intmail.prod.int.phx2.redhat.com (int-mx02.intmail.prod.int.phx2.redhat.com [10.5.11.12]) by mx1.redhat.com (8.14.4/8.14.4) with ESMTP id pA3HnTdY017945 (version=TLSv1/SSLv3 cipher=DHE-RSA-AES256-SHA bits=256 verify=OK) for ; Thu, 3 Nov 2011 13:49:30 -0400 Received: from houston.quesejoda.com (vpn-236-154.phx2.redhat.com [10.3.236.154]) by int-mx02.intmail.prod.int.phx2.redhat.com (8.13.8/8.13.8) with ESMTP id pA3HnSOc017646 for ; Thu, 3 Nov 2011 13:49:29 -0400 Message-ID: <4EB2D428.8000906@redhat.com> Date: Thu, 03 Nov 2011 12:49:28 -0500 From: Aldy Hernandez User-Agent: Mozilla/5.0 (X11; Linux x86_64; rv:7.0) Gecko/20110927 Thunderbird/7.0 MIME-Version: 1.0 To: gcc-patches Subject: [patch] 3/n: trans-mem: runtime Mailing-List: contact gcc-patches-help@gcc.gnu.org; run by ezmlm Precedence: bulk List-Id: List-Unsubscribe: List-Archive: List-Post: List-Help: Sender: gcc-patches-owner@gcc.gnu.org Delivered-To: mailing list gcc-patches@gcc.gnu.org Index: libitm/method-wbetl.cc =================================================================== --- libitm/method-wbetl.cc (.../trunk) (revision 0) +++ libitm/method-wbetl.cc (.../branches/transactional-memory) (revision 180773) @@ -0,0 +1,628 @@ +/* Copyright (C) 2009 Free Software Foundation, Inc. + Contributed by Richard Henderson . + + This file is part of the GNU Transactional Memory Library (libitm). + + Libitm is free software; you can redistribute it and/or modify it + under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3 of the License, or + (at your option) any later version. + + Libitm is distributed in the hope that it will be useful, but WITHOUT ANY + WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS + FOR A PARTICULAR PURPOSE. See the GNU General Public License for + more details. + + Under Section 7 of GPL version 3, you are granted additional + permissions described in the GCC Runtime Library Exception, version + 3.1, as published by the Free Software Foundation. + + You should have received a copy of the GNU General Public License and + a copy of the GCC Runtime Library Exception along with this program; + see the files COPYING3 and COPYING.RUNTIME respectively. If not, see + . */ + +#include "libitm_i.h" + +namespace { + +using namespace GTM; + +class wbetl_dispatch : public abi_dispatch +{ + private: + static const size_t RW_SET_SIZE = 4096; + + struct r_entry + { + gtm_version version; + gtm_stmlock *lock; + }; + + r_entry *m_rset_entries; + size_t m_rset_nb_entries; + size_t m_rset_size; + + struct w_entry + { + /* There's a hashtable where the locks are held, so multiple + cachelines can hash to a given bucket. This link points to the + possible next cacheline that also hashes to this bucket. */ + struct w_entry *next; + + /* Every entry in this bucket (accessed by NEXT) has the same LOCK + address below. */ + gtm_stmlock *lock; + + gtm_cacheline *addr; + gtm_cacheline *value; + gtm_version version; + }; + + w_entry *m_wset_entries; + size_t m_wset_nb_entries; + size_t m_wset_size; + bool m_wset_reallocate; + + gtm_version m_start; + gtm_version m_end; + + gtm_cacheline_page *m_cache_page; + unsigned m_n_cache_page; + + private: + bool local_w_entry_p (w_entry *w); + bool has_read (gtm_stmlock *lock); + bool validate(); + bool extend(); + + gtm_cacheline *do_write_lock(gtm_cacheline *); + gtm_cacheline *do_after_write_lock(gtm_cacheline *); + const gtm_cacheline *do_read_lock(const gtm_cacheline *, bool); + + public: + wbetl_dispatch(); + + virtual const gtm_cacheline *read_lock(const gtm_cacheline *, ls_modifier); + virtual mask_pair write_lock(gtm_cacheline *, ls_modifier); + + virtual bool trycommit(); + virtual void rollback(); + virtual void reinit(); + virtual void fini(); + virtual bool trydropreference (void *, size_t); +}; + +/* Check if W is one of our write locks. */ + +inline bool +wbetl_dispatch::local_w_entry_p (w_entry *w) +{ + return (m_wset_entries <= w && w < m_wset_entries + m_wset_nb_entries); +} + +/* Check if stripe has been read previously. */ + +inline bool +wbetl_dispatch::has_read (gtm_stmlock *lock) +{ + // ??? Consider using an AA tree to lookup the r_set entries. + size_t n = m_rset_nb_entries; + for (size_t i = 0; i < n; ++i) + if (m_rset_entries[i].lock == lock) + return true; + + return false; +} + +/* Validate read set, i.e. check if all read addresses are still valid now. */ + +bool +wbetl_dispatch::validate () +{ + __sync_synchronize (); + + size_t n = m_rset_nb_entries; + for (size_t i = 0; i < n; ++i) + { + r_entry *r = &m_rset_entries[i]; + gtm_stmlock l = *r->lock; + + if (gtm_stmlock_owned_p (l)) + { + w_entry *w = (w_entry *) gtm_stmlock_get_addr (l); + + // If someone has locked us, it better be by someone in the + // current thread. + if (!local_w_entry_p (w)) + return false; + } + else if (gtm_stmlock_get_version (l) != r->version) + return false; + } + + return true; +} + +/* Extend the snapshot range. */ + +bool +wbetl_dispatch::extend () +{ + gtm_version now = gtm_get_clock (); + + if (validate ()) + { + m_end = now; + return true; + } + return false; +} + +/* Acquire a write lock on ADDR. */ + +gtm_cacheline * +wbetl_dispatch::do_write_lock(gtm_cacheline *addr) +{ + gtm_stmlock *lock; + gtm_stmlock l, l2; + gtm_version version; + w_entry *w, *prev = NULL; + + lock = gtm_get_stmlock (addr); + l = *lock; + + restart_no_load: + if (gtm_stmlock_owned_p (l)) + { + w = (w_entry *) gtm_stmlock_get_addr (l); + + /* Did we previously write the same address? */ + if (local_w_entry_p (w)) + { + prev = w; + while (1) + { + if (addr == prev->addr) + return prev->value; + if (prev->next == NULL) + break; + prev = prev->next; + } + + /* Get version from previous entry write set. */ + version = prev->version; + + /* If there's not enough entries, we must reallocate the array, + which invalidates all pointers to write set entries, which + means we have to restart the transaction. */ + if (m_wset_nb_entries == m_wset_size) + { + m_wset_size *= 2; + m_wset_reallocate = true; + gtm_tx()->restart (RESTART_REALLOCATE); + } + + w = &m_wset_entries[m_wset_nb_entries]; + goto do_write; + } + + gtm_tx()->restart (RESTART_LOCKED_WRITE); + } + else + { + version = gtm_stmlock_get_version (l); + + /* We might have read an older version previously. */ + if (version > m_end) + { + if (has_read (lock)) + gtm_tx()->restart (RESTART_VALIDATE_WRITE); + } + + /* Extend write set, aborting to reallocate write set entries. */ + if (m_wset_nb_entries == m_wset_size) + { + m_wset_size *= 2; + m_wset_reallocate = true; + gtm_tx()->restart (RESTART_REALLOCATE); + } + + /* Acquire the lock. */ + w = &m_wset_entries[m_wset_nb_entries]; + l2 = gtm_stmlock_set_owned (w); + l = __sync_val_compare_and_swap (lock, l, l2); + if (l != l2) + goto restart_no_load; + } + + do_write: + m_wset_nb_entries++; + if (prev != NULL) + prev->next = w; + w->next = 0; + w->lock = lock; + w->addr = addr; + w->version = version; + + gtm_cacheline_page *page = m_cache_page; + unsigned index = m_n_cache_page; + + if (page == NULL || index == gtm_cacheline_page::LINES) + { + gtm_cacheline_page *npage = new gtm_cacheline_page; + npage->prev = page; + m_cache_page = page = npage; + m_n_cache_page = 1; + index = 0; + } + else + m_n_cache_page = index + 1; + + gtm_cacheline *line = &page->lines[index]; + w->value = line; + page->masks[index] = 0; + *line = *addr; + + return line; +} + +gtm_cacheline * +wbetl_dispatch::do_after_write_lock (gtm_cacheline *addr) +{ + gtm_stmlock *lock; + gtm_stmlock l; + w_entry *w; + + lock = gtm_get_stmlock (addr); + l = *lock; + assert (gtm_stmlock_owned_p (l)); + + w = (w_entry *) gtm_stmlock_get_addr (l); + assert (local_w_entry_p (w)); + + while (1) + { + if (addr == w->addr) + return w->value; + w = w->next; + } +} + +/* Acquire a read lock on ADDR. */ + +const gtm_cacheline * +wbetl_dispatch::do_read_lock (const gtm_cacheline *addr, bool after_read) +{ + gtm_stmlock *lock; + gtm_stmlock l, l2; + gtm_version version; + w_entry *w; + + lock = gtm_get_stmlock (addr); + l = *lock; + + restart_no_load: + if (gtm_stmlock_owned_p (l)) + { + w = (w_entry *) gtm_stmlock_get_addr (l); + + /* Did we previously write the same address? */ + if (local_w_entry_p (w)) + { + while (1) + { + if (addr == w->addr) + return w->value; + if (w->next == NULL) + return addr; + w = w->next; + } + } + + gtm_tx()->restart (RESTART_LOCKED_READ); + } + + version = gtm_stmlock_get_version (l); + + /* If version is no longer valid, re-validate the read set. */ + if (version > m_end) + { + if (!extend ()) + gtm_tx()->restart (RESTART_VALIDATE_READ); + + if (!after_read) + { + // Verify that the version has not yet been overwritten. The read + // value has not yet been added to read set and may not have been + // checked during the extend. + // + // ??? This only makes sense if we're actually reading the value + // and returning it now -- which I believe the original TinySTM + // did. This doesn't make a whole lot of sense when we're + // manipulating cachelines as we are now. Do we need some other + // form of lock verification here, or is the validate call in + // trycommit sufficient? + + __sync_synchronize (); + l2 = *lock; + if (l != l2) + { + l = l2; + goto restart_no_load; + } + } + } + + if (!after_read) + { + r_entry *r; + + /* Add the address and version to the read set. */ + if (m_rset_nb_entries == m_rset_size) + { + m_rset_size *= 2; + + m_rset_entries = (r_entry *) + xrealloc (m_rset_entries, m_rset_size * sizeof(r_entry)); + } + r = &m_rset_entries[m_rset_nb_entries++]; + r->version = version; + r->lock = lock; + } + + return addr; +} + +const gtm_cacheline * +wbetl_dispatch::read_lock (const gtm_cacheline *addr, ls_modifier ltype) +{ + switch (ltype) + { + case NONTXNAL: + return addr; + case R: + return do_read_lock (addr, false); + case RaR: + return do_read_lock (addr, true); + case RaW: + return do_after_write_lock (const_cast(addr)); + case RfW: + return do_write_lock (const_cast(addr)); + default: + abort (); + } +} + +abi_dispatch::mask_pair +wbetl_dispatch::write_lock (gtm_cacheline *addr, ls_modifier ltype) +{ + gtm_cacheline *line; + + switch (ltype) + { + case NONTXNAL: + return mask_pair (addr, &mask_sink); + case W: + case WaR: + line = do_write_lock (addr); + break; + case WaW: + line = do_after_write_lock (addr); + break; + default: + abort (); + } + + return mask_pair (line, gtm_cacheline_page::mask_for_page_line (line)); +} + +/* Commit the transaction. */ + +bool +wbetl_dispatch::trycommit () +{ + const size_t n = m_wset_nb_entries; + if (n != 0) + { + /* Get commit timestamp. */ + gtm_version t = gtm_inc_clock (); + + /* Validate only if a concurrent transaction has started since. */ + if (m_start != t - 1 && !validate ()) + return false; + + /* Install new versions. */ + for (size_t i = 0; i < n; ++i) + { + w_entry *w = &m_wset_entries[i]; + gtm_cacheline_mask mask + = *gtm_cacheline_page::mask_for_page_line (w->value); + + /* Filter out any updates that overlap the libitm stack. */ + mask = gtm_mask_stack (w->addr, mask); + + gtm_cacheline::copy_mask (w->addr, w->value, mask); + } + + /* Only emit barrier after all cachelines are copied. */ + gtm_cacheline::copy_mask_wb (); + + /* Drop locks. */ + for (size_t i = 0; i < n; ++i) + { + w_entry *w = &m_wset_entries[i]; + + /* Every link along the chain has the same lock, but only + bother dropping the lock once per bucket (at the end). */ + if (w->next == NULL) + *w->lock = gtm_stmlock_set_version (t); + } + } + + __sync_synchronize (); + return true; +} + +void +wbetl_dispatch::rollback () +{ + /* Drop locks. */ + const size_t n = m_wset_nb_entries; + for (size_t i = 0; i < n; ++i) + { + w_entry *w = &m_wset_entries[i]; + + /* Every link along the chain has the same lock, but only + bother dropping the lock once per bucket (at the end). */ + if (w->next == NULL) + *w->lock = gtm_stmlock_set_version (w->version); + } + + __sync_synchronize (); +} + +void +wbetl_dispatch::reinit () +{ + gtm_cacheline_page *page; + + m_rset_nb_entries = 0; + m_wset_nb_entries = 0; + + if (m_wset_reallocate) + { + m_wset_reallocate = 0; + m_wset_entries = (w_entry *) + xrealloc (m_wset_entries, m_wset_size * sizeof(w_entry)); + } + + page = m_cache_page; + if (page) + { + /* Release all but one of the pages of cachelines. */ + gtm_cacheline_page *prev = page->prev; + if (prev) + { + page->prev = 0; + delete prev; + } + + /* Start the next cacheline allocation from the beginning. */ + m_n_cache_page = 0; + } + + m_start = m_end = gtm_get_clock (); +} + +void +wbetl_dispatch::fini () +{ + delete m_cache_page; + free (m_rset_entries); + free (m_wset_entries); + delete this; +} + +/* Attempt to drop any internal references to PTR. Return TRUE if successful. + + This is an adaptation of the transactional memcpy function. + + What we do here is flush out the current transactional content of + PTR to real memory, and remove the write mask bits associated with + it so future commits will ignore this piece of memory. */ + +bool +wbetl_dispatch::trydropreference (void *ptr, size_t size) +{ + if (size == 0) + return true; + + if (!validate ()) + return false; + + uintptr_t isrc = (uintptr_t)ptr; + // The position in the source cacheline where *PTR starts. + uintptr_t sofs = isrc & (CACHELINE_SIZE - 1); + gtm_cacheline *src + = reinterpret_cast(isrc & -CACHELINE_SIZE); + unsigned char *dst = (unsigned char *)ptr; + abi_dispatch::mask_pair pair; + + // If we're trying to drop a reference, we should already have a + // write lock on it. If we don't have one, there's no work to do. + if (!gtm_stmlock_owned_p (*gtm_get_stmlock (src))) + return true; + + // We copy the data in three stages: + + // (a) Copy stray bytes at the beginning that are smaller than a + // cacheline. + if (sofs != 0) + { + size_t sleft = CACHELINE_SIZE - sofs; + size_t min = (size <= sleft ? size : sleft); + + // WaW will give us the current locked entry. + pair = this->write_lock (src, WaW); + + // *jedi mind wave*...these aren't the droids you're looking for. + *pair.mask &= ~((((gtm_cacheline_mask)1 << min) - 1) << sofs); + + memcpy (dst, &pair.line->b[sofs], min); + dst += min; + src++; + size -= min; + } + + // (b) Copy subsequent cacheline sized chunks. + while (size >= CACHELINE_SIZE) + { + pair = this->write_lock(src, WaW); + *pair.mask = 0; + memcpy (dst, pair.line, CACHELINE_SIZE); + dst += CACHELINE_SIZE; + src++; + size -= CACHELINE_SIZE; + } + + // (c) Copy anything left over. + if (size != 0) + { + pair = this->write_lock(src, WaW); + *pair.mask &= ~(((gtm_cacheline_mask)1 << size) - 1); + memcpy (dst, pair.line, size); + } + + // No need to drop locks, since we're going to abort the transaction + // anyhow. + + return true; +} + + +wbetl_dispatch::wbetl_dispatch () + : abi_dispatch (false, false) +{ + m_rset_entries = (r_entry *) xmalloc (RW_SET_SIZE * sizeof(r_entry)); + m_rset_nb_entries = 0; + m_rset_size = RW_SET_SIZE; + + m_wset_entries = (w_entry *) xmalloc (RW_SET_SIZE * sizeof(w_entry)); + m_wset_nb_entries = 0; + m_wset_size = RW_SET_SIZE; + m_wset_reallocate = false; + + m_start = m_end = gtm_get_clock (); + + m_cache_page = 0; + m_n_cache_page = 0; +} + +} // anon namespace + +abi_dispatch * +GTM::dispatch_wbetl () +{ + return new wbetl_dispatch (); +} Index: libitm/alloc_c.cc =================================================================== --- libitm/alloc_c.cc (.../trunk) (revision 0) +++ libitm/alloc_c.cc (.../branches/transactional-memory) (revision 180773) @@ -0,0 +1,72 @@ +/* Copyright (C) 2009, 2011 Free Software Foundation, Inc. + Contributed by Richard Henderson . + + This file is part of the GNU Transactional Memory Library (libitm). + + Libitm is free software; you can redistribute it and/or modify it + under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3 of the License, or + (at your option) any later version. + + Libitm is distributed in the hope that it will be useful, but WITHOUT ANY + WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS + FOR A PARTICULAR PURPOSE. See the GNU General Public License for + more details. + + Under Section 7 of GPL version 3, you are granted additional + permissions described in the GCC Runtime Library Exception, version + 3.1, as published by the Free Software Foundation. + + You should have received a copy of the GNU General Public License and + a copy of the GCC Runtime Library Exception along with this program; + see the files COPYING3 and COPYING.RUNTIME respectively. If not, see + . */ + +#include "libitm_i.h" + + +using namespace GTM; + +extern "C" { + +/* Wrap: malloc (size_t sz) */ +void * +_ITM_malloc (size_t sz) +{ + void *r = malloc (sz); + if (r) + gtm_thr()->record_allocation (r, free); + return r; +} + +/* Wrap: calloc (size_t nm, size_t sz) */ +void * +_ITM_calloc (size_t nm, size_t sz) +{ + void *r = calloc (nm, sz); + if (r) + gtm_thr()->record_allocation (r, free); + return r; +} + +/* Wrap: free (void *ptr) */ +void +_ITM_free (void *ptr) +{ + if (ptr) + gtm_thr()->forget_allocation (ptr, free); +} + +/* Forget any internal references to PTR. */ + +__attribute__((transaction_pure)) +void ITM_REGPARM +_ITM_dropReferences (void *ptr, size_t len) +{ + // The semantics of _ITM_dropReferences are not sufficiently defined in the + // ABI specification, so it does not make sense to support it right now. See + // the libitm documentation for details. + GTM_fatal("_ITM_dropReferences is not supported"); +} + +} // extern "C" Index: libitm/clone.cc =================================================================== --- libitm/clone.cc (.../trunk) (revision 0) +++ libitm/clone.cc (.../branches/transactional-memory) (revision 180773) @@ -0,0 +1,184 @@ +/* Copyright (C) 2009, 2010, 2011 Free Software Foundation, Inc. + Contributed by Richard Henderson . + + This file is part of the GNU Transactional Memory Library (libitm). + + Libitm is free software; you can redistribute it and/or modify it + under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3 of the License, or + (at your option) any later version. + + Libitm is distributed in the hope that it will be useful, but WITHOUT ANY + WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS + FOR A PARTICULAR PURPOSE. See the GNU General Public License for + more details. + + Under Section 7 of GPL version 3, you are granted additional + permissions described in the GCC Runtime Library Exception, version + 3.1, as published by the Free Software Foundation. + + You should have received a copy of the GNU General Public License and + a copy of the GCC Runtime Library Exception along with this program; + see the files COPYING3 and COPYING.RUNTIME respectively. If not, see + . */ + +#include "libitm_i.h" + +using namespace GTM; + +struct clone_entry +{ + void *orig, *clone; +}; + +struct clone_table +{ + clone_entry *table; + size_t size; + clone_table *next; +}; + +static clone_table *all_tables; + +static void * +find_clone (void *ptr) +{ + clone_table *table; + + for (table = all_tables; table ; table = table->next) + { + clone_entry *t = table->table; + size_t lo = 0, hi = table->size, i; + + /* Quick test for whether PTR is present in this table. */ + if (ptr < t[0].orig || ptr > t[hi - 1].orig) + continue; + + /* Otherwise binary search. */ + while (lo < hi) + { + i = (lo + hi) / 2; + if (ptr < t[i].orig) + hi = i; + else if (ptr > t[i].orig) + lo = i + 1; + else + return t[i].clone; + } + + /* Given the quick test above, if we don't find the entry in + this table then it doesn't exist. */ + break; + } + + return NULL; +} + + +void * ITM_REGPARM +_ITM_getTMCloneOrIrrevocable (void *ptr) +{ + void *ret = find_clone (ptr); + if (ret) + return ret; + + gtm_thr()->serialirr_mode (); + + return ptr; +} + +void * ITM_REGPARM +_ITM_getTMCloneSafe (void *ptr) +{ + void *ret = find_clone (ptr); + if (ret == NULL) + abort (); + return ret; +} + +static int +clone_entry_compare (const void *a, const void *b) +{ + const clone_entry *aa = (const clone_entry *)a; + const clone_entry *bb = (const clone_entry *)b; + + if (aa->orig < bb->orig) + return -1; + else if (aa->orig > bb->orig) + return 1; + else + return 0; +} + +namespace { + +// Within find_clone, we know that we are inside a transaction. Because +// of that, we have already synchronized with serial_lock. By taking the +// serial_lock for write, we exclude all transactions while we make this +// change to the clone tables, without having to synchronize on a separate +// lock. Do be careful not to attempt a recursive write lock. + +class ExcludeTransaction +{ + bool do_lock; + + public: + ExcludeTransaction() + { + gtm_thread *tx = gtm_thr(); + do_lock = !(tx && (tx->state & gtm_thread::STATE_SERIAL)); + + if (do_lock) + gtm_thread::serial_lock.write_lock (); + } + + ~ExcludeTransaction() + { + if (do_lock) + gtm_thread::serial_lock.write_unlock (); + } +}; + +} // end anon namespace + + +void +_ITM_registerTMCloneTable (void *xent, size_t size) +{ + clone_entry *ent = static_cast(xent); + clone_table *table; + + table = (clone_table *) xmalloc (sizeof (clone_table)); + table->table = ent; + table->size = size; + + qsort (ent, size, sizeof (clone_entry), clone_entry_compare); + + // Hold the serial_lock while we update the ALL_TABLES datastructure. + { + ExcludeTransaction exclude; + table->next = all_tables; + all_tables = table; + } +} + +void +_ITM_deregisterTMCloneTable (void *xent) +{ + clone_entry *ent = static_cast(xent); + clone_table *tab; + + // Hold the serial_lock while we update the ALL_TABLES datastructure. + { + ExcludeTransaction exclude; + clone_table **pprev; + + for (pprev = &all_tables; + tab = *pprev, tab->table != ent; + pprev = &tab->next) + continue; + *pprev = tab->next; + } + + free (tab); +} Index: libitm/dispatch.h =================================================================== --- libitm/dispatch.h (.../trunk) (revision 0) +++ libitm/dispatch.h (.../branches/transactional-memory) (revision 180773) @@ -0,0 +1,334 @@ +/* Copyright (C) 2011 Free Software Foundation, Inc. + Contributed by Torvald Riegel . + + This file is part of the GNU Transactional Memory Library (libitm). + + Libitm is free software; you can redistribute it and/or modify it + under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3 of the License, or + (at your option) any later version. + + Libitm is distributed in the hope that it will be useful, but WITHOUT ANY + WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS + FOR A PARTICULAR PURPOSE. See the GNU General Public License for + more details. + + Under Section 7 of GPL version 3, you are granted additional + permissions described in the GCC Runtime Library Exception, version + 3.1, as published by the Free Software Foundation. + + You should have received a copy of the GNU General Public License and + a copy of the GCC Runtime Library Exception along with this program; + see the files COPYING3 and COPYING.RUNTIME respectively. If not, see + . */ + +#ifndef DISPATCH_H +#define DISPATCH_H 1 + +#include "libitm.h" +#include "common.h" + +// Creates ABI load/store methods (can be made virtual or static using M, +// use M2 to create separate methods names for virtual and static) +// The _PV variants are for the pure-virtual methods in the base class. +#define ITM_READ_M(T, LSMOD, M, M2) \ + M _ITM_TYPE_##T ITM_REGPARM ITM_##LSMOD##T##M2 (const _ITM_TYPE_##T *ptr) \ + { \ + return load(ptr, abi_dispatch::LSMOD); \ + } + +#define ITM_READ_M_PV(T, LSMOD, M, M2) \ + M _ITM_TYPE_##T ITM_REGPARM ITM_##LSMOD##T##M2 (const _ITM_TYPE_##T *ptr) \ + = 0; + +#define ITM_WRITE_M(T, LSMOD, M, M2) \ + M void ITM_REGPARM ITM_##LSMOD##T##M2 (_ITM_TYPE_##T *ptr, \ + _ITM_TYPE_##T val) \ + { \ + store(ptr, val, abi_dispatch::LSMOD); \ + } + +#define ITM_WRITE_M_PV(T, LSMOD, M, M2) \ + M void ITM_REGPARM ITM_##LSMOD##T##M2 (_ITM_TYPE_##T *ptr, \ + _ITM_TYPE_##T val) \ + = 0; + +// Creates ABI load/store methods for all load/store modifiers for a particular +// type. +#define CREATE_DISPATCH_METHODS_T(T, M, M2) \ + ITM_READ_M(T, R, M, M2) \ + ITM_READ_M(T, RaR, M, M2) \ + ITM_READ_M(T, RaW, M, M2) \ + ITM_READ_M(T, RfW, M, M2) \ + ITM_WRITE_M(T, W, M, M2) \ + ITM_WRITE_M(T, WaR, M, M2) \ + ITM_WRITE_M(T, WaW, M, M2) +#define CREATE_DISPATCH_METHODS_T_PV(T, M, M2) \ + ITM_READ_M_PV(T, R, M, M2) \ + ITM_READ_M_PV(T, RaR, M, M2) \ + ITM_READ_M_PV(T, RaW, M, M2) \ + ITM_READ_M_PV(T, RfW, M, M2) \ + ITM_WRITE_M_PV(T, W, M, M2) \ + ITM_WRITE_M_PV(T, WaR, M, M2) \ + ITM_WRITE_M_PV(T, WaW, M, M2) + +// Creates ABI load/store methods for all types. +// See CREATE_DISPATCH_FUNCTIONS for comments. +#define CREATE_DISPATCH_METHODS(M, M2) \ + CREATE_DISPATCH_METHODS_T (U1, M, M2) \ + CREATE_DISPATCH_METHODS_T (U2, M, M2) \ + CREATE_DISPATCH_METHODS_T (U4, M, M2) \ + CREATE_DISPATCH_METHODS_T (U8, M, M2) \ + CREATE_DISPATCH_METHODS_T (F, M, M2) \ + CREATE_DISPATCH_METHODS_T (D, M, M2) \ + CREATE_DISPATCH_METHODS_T (E, M, M2) \ + CREATE_DISPATCH_METHODS_T (CF, M, M2) \ + CREATE_DISPATCH_METHODS_T (CD, M, M2) \ + CREATE_DISPATCH_METHODS_T (CE, M, M2) +#define CREATE_DISPATCH_METHODS_PV(M, M2) \ + CREATE_DISPATCH_METHODS_T_PV (U1, M, M2) \ + CREATE_DISPATCH_METHODS_T_PV (U2, M, M2) \ + CREATE_DISPATCH_METHODS_T_PV (U4, M, M2) \ + CREATE_DISPATCH_METHODS_T_PV (U8, M, M2) \ + CREATE_DISPATCH_METHODS_T_PV (F, M, M2) \ + CREATE_DISPATCH_METHODS_T_PV (D, M, M2) \ + CREATE_DISPATCH_METHODS_T_PV (E, M, M2) \ + CREATE_DISPATCH_METHODS_T_PV (CF, M, M2) \ + CREATE_DISPATCH_METHODS_T_PV (CD, M, M2) \ + CREATE_DISPATCH_METHODS_T_PV (CE, M, M2) + +// Creates memcpy/memmove/memset methods. +#define CREATE_DISPATCH_METHODS_MEM() \ +virtual void memtransfer(void *dst, const void* src, size_t size, \ + bool may_overlap, ls_modifier dst_mod, ls_modifier src_mod) \ +{ \ + memtransfer_static(dst, src, size, may_overlap, dst_mod, src_mod); \ +} \ +virtual void memset(void *dst, int c, size_t size, ls_modifier mod) \ +{ \ + memset_static(dst, c, size, mod); \ +} + +#define CREATE_DISPATCH_METHODS_MEM_PV() \ +virtual void memtransfer(void *dst, const void* src, size_t size, \ + bool may_overlap, ls_modifier dst_mod, ls_modifier src_mod) = 0; \ +virtual void memset(void *dst, int c, size_t size, ls_modifier mod) = 0; + + +// Creates ABI load/store functions that can target either a class or an +// object. +#define ITM_READ(T, LSMOD, TARGET, M2) \ + _ITM_TYPE_##T ITM_REGPARM _ITM_##LSMOD##T (const _ITM_TYPE_##T *ptr) \ + { \ + return TARGET ITM_##LSMOD##T##M2(ptr); \ + } + +#define ITM_WRITE(T, LSMOD, TARGET, M2) \ + void ITM_REGPARM _ITM_##LSMOD##T (_ITM_TYPE_##T *ptr, _ITM_TYPE_##T val) \ + { \ + TARGET ITM_##LSMOD##T##M2(ptr, val); \ + } + +// Creates ABI load/store functions for all load/store modifiers for a +// particular type. +#define CREATE_DISPATCH_FUNCTIONS_T(T, TARGET, M2) \ + ITM_READ(T, R, TARGET, M2) \ + ITM_READ(T, RaR, TARGET, M2) \ + ITM_READ(T, RaW, TARGET, M2) \ + ITM_READ(T, RfW, TARGET, M2) \ + ITM_WRITE(T, W, TARGET, M2) \ + ITM_WRITE(T, WaR, TARGET, M2) \ + ITM_WRITE(T, WaW, TARGET, M2) + +// Creates ABI memcpy/memmove/memset functions. +#define ITM_MEMTRANSFER_DEF(TARGET, M2, NAME, READ, WRITE) \ +void ITM_REGPARM _ITM_memcpy##NAME(void *dst, const void *src, size_t size) \ +{ \ + TARGET memtransfer##M2 (dst, src, size, \ + false, GTM::abi_dispatch::WRITE, GTM::abi_dispatch::READ); \ +} \ +void ITM_REGPARM _ITM_memmove##NAME(void *dst, const void *src, size_t size) \ +{ \ + TARGET memtransfer##M2 (dst, src, size, \ + GTM::abi_dispatch::memmove_overlap_check(dst, src, size, \ + GTM::abi_dispatch::WRITE, GTM::abi_dispatch::READ), \ + GTM::abi_dispatch::WRITE, GTM::abi_dispatch::READ); \ +} + +#define ITM_MEMSET_DEF(TARGET, M2, WRITE) \ +void ITM_REGPARM _ITM_memset##WRITE(void *dst, int c, size_t size) \ +{ \ + TARGET memset##M2 (dst, c, size, GTM::abi_dispatch::WRITE); \ +} \ + + +// ??? The number of virtual methods is large (7*4 for integers, 7*6 for FP, +// 7*3 for vectors). Is the cache footprint so costly that we should go for +// a small table instead (i.e., only have two virtual load/store methods for +// each supported type)? Note that this doesn't affect custom code paths at +// all because these use only direct calls. +// A large cache footprint could especially decrease HTM performance (due +// to HTM capacity). We could add the modifier (RaR etc.) as parameter, which +// would give us just 4*2+6*2+3*2 functions (so we'd just need one line for +// the integer loads/stores), but then the modifier can be checked only at +// runtime. +// For memcpy/memmove/memset, we just have two virtual methods (memtransfer +// and memset). +#define CREATE_DISPATCH_FUNCTIONS(TARGET, M2) \ + CREATE_DISPATCH_FUNCTIONS_T (U1, TARGET, M2) \ + CREATE_DISPATCH_FUNCTIONS_T (U2, TARGET, M2) \ + CREATE_DISPATCH_FUNCTIONS_T (U4, TARGET, M2) \ + CREATE_DISPATCH_FUNCTIONS_T (U8, TARGET, M2) \ + CREATE_DISPATCH_FUNCTIONS_T (F, TARGET, M2) \ + CREATE_DISPATCH_FUNCTIONS_T (D, TARGET, M2) \ + CREATE_DISPATCH_FUNCTIONS_T (E, TARGET, M2) \ + CREATE_DISPATCH_FUNCTIONS_T (CF, TARGET, M2) \ + CREATE_DISPATCH_FUNCTIONS_T (CD, TARGET, M2) \ + CREATE_DISPATCH_FUNCTIONS_T (CE, TARGET, M2) \ + ITM_MEMTRANSFER_DEF(TARGET, M2, RnWt, NONTXNAL, W) \ + ITM_MEMTRANSFER_DEF(TARGET, M2, RnWtaR, NONTXNAL, WaR) \ + ITM_MEMTRANSFER_DEF(TARGET, M2, RnWtaW, NONTXNAL, WaW) \ + ITM_MEMTRANSFER_DEF(TARGET, M2, RtWn, R, NONTXNAL) \ + ITM_MEMTRANSFER_DEF(TARGET, M2, RtWt, R, W) \ + ITM_MEMTRANSFER_DEF(TARGET, M2, RtWtaR, R, WaR) \ + ITM_MEMTRANSFER_DEF(TARGET, M2, RtWtaW, R, WaW) \ + ITM_MEMTRANSFER_DEF(TARGET, M2, RtaRWn, RaR, NONTXNAL) \ + ITM_MEMTRANSFER_DEF(TARGET, M2, RtaRWt, RaR, W) \ + ITM_MEMTRANSFER_DEF(TARGET, M2, RtaRWtaR, RaR, WaR) \ + ITM_MEMTRANSFER_DEF(TARGET, M2, RtaRWtaW, RaR, WaW) \ + ITM_MEMTRANSFER_DEF(TARGET, M2, RtaWWn, RaW, NONTXNAL) \ + ITM_MEMTRANSFER_DEF(TARGET, M2, RtaWWt, RaW, W) \ + ITM_MEMTRANSFER_DEF(TARGET, M2, RtaWWtaR, RaW, WaR) \ + ITM_MEMTRANSFER_DEF(TARGET, M2, RtaWWtaW, RaW, WaW) \ + ITM_MEMSET_DEF(TARGET, M2, W) \ + ITM_MEMSET_DEF(TARGET, M2, WaR) \ + ITM_MEMSET_DEF(TARGET, M2, WaW) + + +// Creates ABI load/store functions that delegate to a transactional memcpy. +#define ITM_READ_MEMCPY(T, LSMOD, TARGET, M2) \ + _ITM_TYPE_##T ITM_REGPARM _ITM_##LSMOD##T (const _ITM_TYPE_##T *ptr)\ + { \ + _ITM_TYPE_##T v; \ + TARGET memtransfer##M2(&v, ptr, sizeof(_ITM_TYPE_##T), false, \ + GTM::abi_dispatch::NONTXNAL, GTM::abi_dispatch::LSMOD); \ + return v; \ + } + +#define ITM_WRITE_MEMCPY(T, LSMOD, TARGET, M2) \ + void ITM_REGPARM _ITM_##LSMOD##T (_ITM_TYPE_##T *ptr, _ITM_TYPE_##T val)\ + { \ + TARGET memtransfer##M2(ptr, &val, sizeof(_ITM_TYPE_##T), false, \ + GTM::abi_dispatch::LSMOD, GTM::abi_dispatch::NONTXNAL); \ + } + +#define CREATE_DISPATCH_FUNCTIONS_T_MEMCPY(T, TARGET, M2) \ + ITM_READ_MEMCPY(T, R, TARGET, M2) \ + ITM_READ_MEMCPY(T, RaR, TARGET, M2) \ + ITM_READ_MEMCPY(T, RaW, TARGET, M2) \ + ITM_READ_MEMCPY(T, RfW, TARGET, M2) \ + ITM_WRITE_MEMCPY(T, W, TARGET, M2) \ + ITM_WRITE_MEMCPY(T, WaR, TARGET, M2) \ + ITM_WRITE_MEMCPY(T, WaW, TARGET, M2) + + +namespace GTM HIDDEN { + +struct gtm_transaction_cp; + +struct method_group +{ + // Start using a TM method from this group. This constructs required meta + // data on demand when this method group is actually used. Will be called + // either on first use or after a previous call to fini(). + virtual void init() = 0; + // Stop using any method from this group for now. This can be used to + // destruct meta data as soon as this method group is not used anymore. + virtual void fini() = 0; +}; + + +// This is the base interface that all TM methods have to implement. +struct abi_dispatch +{ +public: + enum ls_modifier { NONTXNAL, R, RaR, RaW, RfW, W, WaR, WaW }; + +private: + // Disallow copies + abi_dispatch(const abi_dispatch &) = delete; + abi_dispatch& operator=(const abi_dispatch &) = delete; + +public: + // Starts or restarts a transaction. Is called right before executing the + // transactional application code (by either returning from + // gtm_thread::begin_transaction or doing the longjmp when restarting). + // Returns NO_RESTART if the transaction started successfully. Returns + // a real restart reason if it couldn't start and does need to abort. This + // allows TM methods to just give up and delegate ensuring progress to the + // restart mechanism. If it returns a restart reason, this call must be + // idempotent because it will trigger the restart mechanism, which could + // switch to a different TM method. + virtual gtm_restart_reason begin_or_restart() = 0; + // Tries to commit the transaction. Iff this returns true, the transaction + // got committed and all per-transaction data will have been reset. + // Currently, this is called only for the commit of the outermost + // transaction, or when switching to serial mode (which can happen in a + // nested transaction). + // If privatization safety must be ensured in a quiescence-based way, set + // priv_time to a value different to 0. Nontransactional code will not be + // executed after this commit until all registered threads' shared_state is + // larger than or equal to this value. + virtual bool trycommit(gtm_word& priv_time) = 0; + // Rolls back a transaction. Called on abort or after trycommit() returned + // false. + virtual void rollback(gtm_transaction_cp *cp = 0) = 0; + + // Return an alternative method that is compatible with the current + // method but supports closed nesting. Return zero if there is none. + // Note that too be compatible, it must be possible to switch to this other + // method on begin of a nested transaction without committing or restarting + // the parent method. + virtual abi_dispatch* closed_nesting_alternative() { return 0; } + + bool read_only () const { return m_read_only; } + bool write_through() const { return m_write_through; } + bool can_run_uninstrumented_code() const + { + return m_can_run_uninstrumented_code; + } + // Returns true iff this TM method supports closed nesting. + bool closed_nesting() const { return m_closed_nesting; } + method_group* get_method_group() const { return m_method_group; } + + static void *operator new(size_t s) { return xmalloc (s); } + static void operator delete(void *p) { free (p); } + +public: + static bool memmove_overlap_check(void *dst, const void *src, size_t size, + ls_modifier dst_mod, ls_modifier src_mod); + + // Creates the ABI dispatch methods for loads and stores. + // ??? Should the dispatch table instead be embedded in the dispatch object + // to avoid the indirect lookup in the vtable? + CREATE_DISPATCH_METHODS_PV(virtual, ) + // Creates the ABI dispatch methods for memcpy/memmove/memset. + CREATE_DISPATCH_METHODS_MEM_PV() + +protected: + const bool m_read_only; + const bool m_write_through; + const bool m_can_run_uninstrumented_code; + const bool m_closed_nesting; + method_group* const m_method_group; + abi_dispatch(bool ro, bool wt, bool uninstrumented, bool closed_nesting, + method_group* mg) : + m_read_only(ro), m_write_through(wt), + m_can_run_uninstrumented_code(uninstrumented), + m_closed_nesting(closed_nesting), m_method_group(mg) + { } +}; + +} + +#endif // DISPATCH_H Index: libitm/aatree.cc =================================================================== --- libitm/aatree.cc (.../trunk) (revision 0) +++ libitm/aatree.cc (.../branches/transactional-memory) (revision 180773) @@ -0,0 +1,222 @@ +/* Copyright (C) 2009 Free Software Foundation, Inc. + Contributed by Richard Henderson . + + This file is part of the GNU Transactional Memory Library (libitm). + + Libitm is free software; you can redistribute it and/or modify it + under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3 of the License, or + (at your option) any later version. + + Libitm is distributed in the hope that it will be useful, but WITHOUT ANY + WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS + FOR A PARTICULAR PURPOSE. See the GNU General Public License for + more details. + + Under Section 7 of GPL version 3, you are granted additional + permissions described in the GCC Runtime Library Exception, version + 3.1, as published by the Free Software Foundation. + + You should have received a copy of the GNU General Public License and + a copy of the GCC Runtime Library Exception along with this program; + see the files COPYING3 and COPYING.RUNTIME respectively. If not, see + . */ + +// Implements an AA tree (http://en.wikipedia.org/wiki/AA_tree) with an +// integer key, and data attached to the node via flexible array member. + +#include "libitm_i.h" + +namespace GTM HIDDEN { + +// The code for rebalancing the tree is greatly simplified by never +// having to check for null pointers. Instead, leaf node links point +// to this node, NIL, which points to itself. +const aa_node_base aa_node_base::s_nil(0); + + +// Remove left horizontal links. Swap the pointers of horizontal left links. + +aa_node_base * +aa_node_base::skew () +{ + aa_node_base *l = this->link(L); + if (this->m_level != 0 && l->m_level == this->m_level) + { + this->set_link(L, l->link(R)); + l->set_link(R, this); + return l; + } + return this; +} + + +// Remove consecutive horizontal links. Take the middle node, +// elevate it, and return it. + +aa_node_base * +aa_node_base::split () +{ + aa_node_base *r = this->link(R); + if (this->m_level != 0 && r->link(R)->m_level == this->m_level) + { + this->set_link(R, r->link(L)); + r->set_link(L, this); + r->m_level += 1; + return r; + } + return this; +} + +// Decrease the level of THIS to be one more than the level of its children. + +void +aa_node_base::decrease_level () +{ + aa_node_base *l = this->link(L); + aa_node_base *r = this->link(R); + level_type llev = l->m_level; + level_type rlev = r->m_level; + level_type should_be = (llev < rlev ? llev : rlev) + 1; + + if (should_be < this->m_level) + { + this->m_level = should_be; + if (should_be < rlev) + r->m_level = should_be; + } +} + +// Find and return the node in the tree with key K. + +template +typename aa_tree_key::node_ptr +aa_tree_key::find(KEY k) const +{ + node_ptr t = m_tree; + if (t != 0) + do + { + if (t->key == k) + return t; + t = t->link(k > t->key); + } + while (!t->is_nil()); + return 0; +} + +// Insert N into T and rebalance. Return the new balanced tree. + +template +typename aa_tree_key::node_ptr +aa_tree_key::insert_1 (node_ptr t, node_ptr n) +{ + bool dir = n->key > t->key; + node_ptr c = t->link(dir); + + // Insert the node, recursively. + if (c->is_nil()) + c = n; + else + c = insert_1 (c, n); + t->set_link(dir, c); + + // Rebalance the tree, as needed. + t = t->skew(); + t = t->split(); + + return t; +} + +template +void +aa_tree_key::insert(node_ptr n) +{ + if (m_tree == 0) + m_tree = n; + else + m_tree = insert_1 (m_tree, n); +} + +// Delete K from T and rebalance. Return the new balanced tree. + +template +typename aa_tree_key::node_ptr +aa_tree_key::erase_1 (node_ptr t, KEY k, node_ptr *pfree) +{ + node_ptr r; + bool dir; + + // If this is the node we're looking for, delete it. Else recurse. + if (k == t->key) + { + node_ptr l, sub, end; + + l = t->link(node::L); + r = t->link(node::R); + + if (pfree) + *pfree = t; + + // If this is a leaf node, simply remove the node. Otherwise, + // we have to find either a predecessor or a successor node to + // replace this one. + if (l->is_nil()) + { + if (r->is_nil()) + return r; + sub = r, dir = node::L; + } + else + sub = l, dir = node::R; + + // Find the successor or predecessor. + for (end = sub; !end->link(dir)->is_nil(); end = end->link(dir)) + continue; + + // Remove it (but don't free) from the subtree. + sub = erase_1 (sub, end->key, 0); + + // Replace T with the successor we just extracted. + end->set_link(!dir, sub); + t = end; + } + else + { + dir = k > t->key; + t->set_link(dir, erase_1 (t->link(dir), k, pfree)); + } + + // Rebalance the tree. + t->decrease_level(); + t = t->skew(); + r = t->link(node::R)->skew(); + t->set_link(node::R, r); + r->set_link(node::R, r->link(node::R)->skew()); + t = t->split (); + t->set_link(node::R, t->link(node::R)->split()); + + return t; +} + +template +typename aa_tree_key::node_ptr +aa_tree_key::erase (KEY k) +{ + node_ptr t = m_tree; + if (t == 0) + return 0; + + node_ptr do_free = 0; + t = erase_1 (t, k, &do_free); + if (t->is_nil()) + t = 0; + m_tree = t; + return do_free; +} + +// Instantiate key classes. + +template class aa_tree_key; + +} // namespace GTM Index: libitm/aatree.h =================================================================== --- libitm/aatree.h (.../trunk) (revision 0) +++ libitm/aatree.h (.../branches/transactional-memory) (revision 180773) @@ -0,0 +1,215 @@ +/* Copyright (C) 2009, 2011 Free Software Foundation, Inc. + Contributed by Richard Henderson . + + This file is part of the GNU Transactional Memory Library (libitm). + + Libitm is free software; you can redistribute it and/or modify it + under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3 of the License, or + (at your option) any later version. + + Libitm is distributed in the hope that it will be useful, but WITHOUT ANY + WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS + FOR A PARTICULAR PURPOSE. See the GNU General Public License for + more details. + + Under Section 7 of GPL version 3, you are granted additional + permissions described in the GCC Runtime Library Exception, version + 3.1, as published by the Free Software Foundation. + + You should have received a copy of the GNU General Public License and + a copy of the GCC Runtime Library Exception along with this program; + see the files COPYING3 and COPYING.RUNTIME respectively. If not, see + . */ + +/* Implements an AA tree (http://en.wikipedia.org/wiki/AA_tree) with an + integer key, and data attached to the node via flexible array member. */ + +#ifndef LIBITM_AATREE_H +#define LIBITM_AATREE_H 1 + +namespace GTM HIDDEN { + +template class aa_tree_key; + +class aa_node_base +{ + public: + static const bool L = false; + static const bool R = true; + + private: + typedef unsigned int level_type; + + aa_node_base *m_link[2]; + level_type m_level; + + static const aa_node_base s_nil; + + public: + aa_node_base(level_type l = 1) + : m_link { const_cast(&s_nil), + const_cast(&s_nil) }, + m_level(l) + { } + + bool is_nil() const { return this == &s_nil; } + + aa_node_base * link(bool d) { return m_link[d]; } + void set_link(bool d, aa_node_base *val) { m_link[d] = val; } + + aa_node_base *skew(); + aa_node_base *split(); + void decrease_level(); + + static void *operator new (size_t s) { return xmalloc (s); } + static void operator delete (void *p) { free (p); } +}; + +template +struct aa_node_key : public aa_node_base +{ + typedef aa_node_base base; + + KEY key; + + explicit aa_node_key(KEY k) : key(k) { } + + aa_node_key * link(bool d) + { + return static_cast(base::link(d)); + } + + aa_node_key *skew() { return static_cast(base::skew()); } + aa_node_key *split() { return static_cast(base::split()); } +}; + +template +struct aa_node : public aa_node_key +{ + typedef aa_node_key base; + + DATA data; + + explicit aa_node(KEY k) : base(k) { } + + aa_node * link(bool d) + { + return static_cast(base::link(d)); + } +}; + +template +class aa_tree_key +{ + public: + typedef aa_node_key node; + typedef node *node_ptr; + + protected: + node_ptr m_tree; + + protected: + aa_tree_key() : m_tree(0) { } + + node_ptr find(KEY k) const; + + static node_ptr insert_1 (node_ptr t, node_ptr n); + void insert(node_ptr n); + + static node_ptr erase_1 (node_ptr t, KEY k, node_ptr *pfree); + node_ptr erase(KEY k); +}; + +extern template class aa_tree_key; + +template +class aa_tree : public aa_tree_key +{ + public: + typedef aa_tree_key base; + typedef aa_node node; + typedef node *node_ptr; + + typedef void (*trav_callback)(KEY, DATA *, void *); + + private: + static void clear_1 (node_ptr); + static void traverse_1 (node_ptr, trav_callback, void *); + + public: + aa_tree() = default; + ~aa_tree() { clear(); } + + static void *operator new (size_t s, aa_tree* p) { return p; } + + DATA *find(KEY k) const + { + node_ptr n = static_cast(base::find (k)); + return n ? &n->data : 0; + } + + DATA *insert(KEY k) + { + node_ptr n = new node(k); + base::insert(n); + return &n->data; + } + + void erase(KEY k) + { + node_ptr n = static_cast(base::erase (k)); + delete n; + } + + node_ptr remove(KEY k, DATA** data) + { + node_ptr n = static_cast(base::erase (k)); + *data = (n ? &n->data : 0); + return n; + } + + void clear() + { + node_ptr n = static_cast(this->m_tree); + if (n) + { + this->m_tree = 0; + clear_1 (n); + } + } + + void traverse (trav_callback cb, void *cb_data) + { + node_ptr t = static_cast(this->m_tree); + if (t != 0) + traverse_1 (t, cb, cb_data); + } +}; + + +template +void +aa_tree::clear_1 (node_ptr t) +{ + if (t->is_nil()) + return; + clear_1 (t->link(node::L)); + clear_1 (t->link(node::R)); + delete t; +} + +template +void +aa_tree::traverse_1 (node_ptr t, trav_callback cb, void *cb_data) +{ + if (t->is_nil()) + return; + cb (t->key, &t->data, cb_data); + traverse_1 (t->link(node::L), cb, cb_data); + traverse_1 (t->link(node::R), cb, cb_data); +} + +} // namespace GTM + +#endif // LIBITM_AATREE_H Index: libitm/libitm.texi =================================================================== --- libitm/libitm.texi (.../trunk) (revision 0) +++ libitm/libitm.texi (.../branches/transactional-memory) (revision 180773) @@ -0,0 +1,778 @@ +\input texinfo @c -*-texinfo-*- + +@c %**start of header +@setfilename libitm.info +@settitle GNU libitm +@c %**end of header + + +@copying +Copyright @copyright{} 2011 Free Software Foundation, Inc. + +Permission is granted to copy, distribute and/or modify this document +under the terms of the GNU Free Documentation License, Version 1.2 or +any later version published by the Free Software Foundation; with the +Invariant Sections being ``Funding Free Software'', the Front-Cover +texts being (a) (see below), and with the Back-Cover Texts being (b) +(see below). A copy of the license is included in the section entitled +``GNU Free Documentation License''. + +(a) The FSF's Front-Cover Text is: + + A GNU Manual + +(b) The FSF's Back-Cover Text is: + + You have freedom to copy and modify this GNU Manual, like GNU + software. Copies published by the Free Software Foundation raise + funds for GNU development. +@end copying + +@ifinfo +@dircategory GNU Libraries +@direntry +* libitm: (libitm). GNU Transactional Memory Library +@end direntry + +This manual documents the GNU Transactional Memory Library. + +Published by the Free Software Foundation +51 Franklin Street, Fifth Floor +Boston, MA 02110-1301 USA + +@insertcopying +@end ifinfo + + +@setchapternewpage odd + +@titlepage +@title The GNU Transactional Memory Library +@page +@vskip 0pt plus 1filll +@comment For the @value{version-GCC} Version* +@sp 1 +Published by the Free Software Foundation @* +51 Franklin Street, Fifth Floor@* +Boston, MA 02110-1301, USA@* +@sp 1 +@insertcopying +@end titlepage + +@summarycontents +@contents +@page + + +@node Top +@top Introduction +@cindex Introduction + +This manual documents the usage and internals of libitm, the GNU Transactional +Memory Library. It provides transaction support for accesses to a process' +memory, enabling easy-to-use synchronization of accesses to shared memory by +several threads. + + +@comment +@comment When you add a new menu item, please keep the right hand +@comment aligned to the same column. Do not use tabs. This provides +@comment better formatting. +@comment +@menu +* Enabling libitm:: How to enable libitm for your applications. +* C/C++ Language Constructs for TM:: + Notes on the language-level interface supported + by gcc. +* The libitm ABI:: Notes on the external ABI provided by libitm. +* Internals:: Notes on libitm's internal synchronization. +* Copying:: GNU general public license says + how you can copy and share libgomp. +* GNU Free Documentation License:: + How you can copy and share this manual. +* Funding:: How to help assure continued work for free + software. +* Index:: Index of this documentation. +@end menu + + +@c --------------------------------------------------------------------- +@c Enabling libitm +@c --------------------------------------------------------------------- + +@node Enabling libitm +@chapter Enabling libitm + +To activate support for TM in C/C++, the compile-time flag @option{-fgnu-tm} +must be specified. This enables TM language-level constructs such as +transaction statements (@code{__transaction}, @pxref{C/C++ Language +Constructs for TM} for details). + +@c --------------------------------------------------------------------- +@c C/C++ Language Constructs for TM +@c --------------------------------------------------------------------- + +@node C/C++ Language Constructs for TM +@chapter C/C++ Language Constructs for TM + +TODO: link to the C++ TM spec. a few examples. how gcc's support differs. + +@c --------------------------------------------------------------------- +@c The libitm ABI +@c --------------------------------------------------------------------- + +@node The libitm ABI +@chapter The libitm ABI + +The ABI provided by libitm is basically equal to the Linux variant of Intel's +current TM ABI specification document (Revision 1.1, May 6 2009) but with the +differences listed in this chapter. It would be good if these changes would +eventually be merged into a future version of this specification. To ease +look-up, the following subsections mirror the structure of this specification. + +@section [No changes] Objectives +@section [No changes] Non-objectives + +@section Library design principles +@subsection [No changes] Calling conventions +@subsection [No changes] TM library algorithms +@subsection [No changes] Optimized load and store routines +@subsection [No changes] Aligned load and store routines + +@subsection Data logging functions + +The memory locations accessed with transactional loads and stores and the +memory locations whose values are logged must not overlap. This required +separation only extends to the scope of the execution of one transaction +including all the executions of all nested transactions. + +The compiler must be consistent (within the scope of a single transaction) +about which memory locations are shared and which are not shared with other +threads (i.e., data must be accessed either transactionally or +nontransactionally). Otherwise, non-write-through TM algorithms would not work. + +@subsection [No changes] Scatter/gather calls +@subsection [No changes] Serial and irrevocable mode +@subsection [No changes] Transaction descriptor +@subsection Store allocation + +There is no @code{getTransaction} function. + +@subsection [No changes] Naming conventions + +@subsection Function pointer encryption + +Currently, this is not implemented. + + +@section Types and macros list + +@code{_ITM_codeProperties} has changed, @pxref{txn-code-properties,,Starting a +transaction}. +@code{_ITM_srcLocation} is not used. + + +@section Function list + +@subsection Initialization and finalization functions +These functions are not part of the ABI. + +@subsection [No changes] Version checking +@subsection [No changes] Error reporting +@subsection [No changes] inTransaction call + +@subsection State manipulation functions +There is no @code{getTransaction} function. Transaction identifiers for +nested transactions will be ordered but not necessarily sequential (i.e., for +a nested transaction's identifier @var{IN} and its enclosing transaction's +identifier @var{IE}, it is guaranteed that @math{IN >= IE}). + +@subsection [No changes] Source locations + +@subsection Starting a transaction + +@subsubsection Transaction code properties + +@anchor{txn-code-properties} +The bit @code{hasNoXMMUpdate} is instead called @code{hasNoVectorUpdate}. +Iff it is set, vector register save/restore is not necessary for any target +machine. + +The @code{hasNoFloatUpdate} bit (@code{0x0010}) is new. Iff it is set, floating +point register save/restore is not necessary for any target machine. + +@code{undoLogCode} is not supported and a fatal runtime error will be raised +if this bit is set. It is not properly defined in the ABI why barriers +other than undo logging are not present; Are they not necessary (e.g., a +transaction operating purely on thread-local data) or have they been omitted by +the compiler because it thinks that some kind of global synchronization +(e.g., serial mode) might perform better? The specification suggests that the +latter might be the case, but the former seems to be more useful. + +The @code{readOnly} bit (@code{0x4000}) is new. @strong{TODO} Lexical or dynamic +scope? + +@code{hasNoRetry} is not supported. If this bit is not set, but +@code{hasNoAbort} is set, the library can assume that transaction +rollback will not be requested. + +It would be useful if the absence of externally-triggered rollbacks would be +reported for the dynamic scope as well, not just for the lexical scope +(@code{hasNoAbort}). Without this, a library cannot exploit this together +with flat nesting. + +@code{exceptionBlock} is not supported because exception blocks are not used. + +@subsubsection [No changes] Windows exception state +@subsubsection [No changes] Other machine state + +@subsubsection [No changes] Results from beginTransaction + +@subsection Aborting a transaction + +@code{_ITM_rollbackTransaction} is not supported. @code{_ITM_abortTransaction} +is supported but the abort reasons @code{exceptionBlockAbort}, +@code{TMConflict}, and @code{userRetry} are not supported. There are no +exception blocks in general, so the related cases also do not have to be +considered. To encode @code{__transaction_cancel [[outer]]}, compilers must +set the new @code{outerAbort} bit (@code{0x10}) additionally to the +@code{userAbort} bit in the abort reason. + +@subsection Committing a transaction + +The exception handling (EH) scheme is different. The Intel ABI requires the +@code{_ITM_tryCommitTransaction} function that will return even when the +commit failed and will have to be matched with calls to either +@code{_ITM_abortTransaction} or @code{_ITM_commitTransaction}. In contrast, +gcc relies on transactional wrappers for the functions of the Exception +Handling ABI and on one additional commit function (shown below). This allows +the TM to keep track of EH internally and thus it does not have to embed the +cleanup of EH state into the existing EH code in the program. +@code{_ITM_tryCommitTransaction} is not supported. +@code{_ITM_commitTransactionToId} is also not supported because the +propagation of thrown exceptions will not bypass commits of nested +transactions. + +@example +void _ITM_commitTransactionEH(void *exc_ptr) ITM_REGPARM; +void *_ITM_cxa_allocate_exception (size_t); +void _ITM_cxa_throw (void *obj, void *tinfo, void *dest); +void *_ITM_cxa_begin_catch (void *exc_ptr); +void _ITM_cxa_end_catch (void); +@end example + +@code{_ITM_commitTransactionEH} must be called to commit a transaction if an +exception could be in flight at this position in the code. @code{exc_ptr} is +the current exception or zero if there is no current exception. +The @code{_ITM_cxa...} functions are transactional wrappers for the respective +@code{__cxa...} functions and must be called instead of these in transactional +code. + +To support this EH scheme, libstdc++ needs to provide one additional function +(@code{_cxa_tm_cleanup}), which is used by the TM to clean up the exception +handling state while rolling back a transaction: + +@example +void __cxa_tm_cleanup (void *unthrown_obj, void *cleanup_exc, + unsigned int caught_count); +@end example + +@code{unthrown_obj} is non-null if the program called +@code{__cxa_allocate_exception} for this exception but did not yet called +@code{__cxa_throw} for it. @code{cleanup_exc} is non-null if the program is +currently processing a cleanup along an exception path but has not caught this +exception yet. @code{caught_count} is the nesting depth of +@code{__cxa_begin_catch} within the transaction (which can be counted by the TM +using @code{_ITM_cxa_begin_catch} and @code{_ITM_cxa_end_catch}); +@code{__cxa_tm_cleanup} then performs rollback by essentially performing +@code{__cxa_end_catch} that many times. + + + +@subsection Exception handling support + +Currently, there is no support for functionality like +@code{__transaction_cancel throw} as described in the C++ TM specification. +Supporting this should be possible with the EH scheme explained previously +because via the transactional wrappers for the EH ABI, the TM is able to +observe and intercept EH. + + +@subsection [No changes] Transition to serial--irrevocable mode +@subsection [No changes] Data transfer functions +@subsection [No changes] Transactional memory copies + +@subsection Transactional versions of memmove + +If either the source or destination memory region is to be accessed +nontransactionally, then source and destination regions must not be +overlapping. The respective @code{_ITM_memmove} functions are still +available but a fatal runtime error will be raised if such regions do overlap. +To support this functionality, the ABI would have to specify how the +intersection of the regions has to be accessed (i.e., transactionally or +nontransactionally). + +@subsection [No changes] Transactional versions of memset +@subsection [No changes] Logging functions + +@subsection User-registered commit and undo actions + +Commit actions will get executed in the same order in which the respective +calls to @code{_ITM_addUserCommitAction} happened. Only +@code{_ITM_noTransactionId} is allowed as value for the +@code{resumingTransactionId} argument. Commit actions get executed after +privatization safety has been ensured. + +Undo actions will get executed in reverse order compared to the order in which +the respective calls to @code{_ITM_addUserUndoAction} happened. The ordering of +undo actions w.r.t. the roll-back of other actions (e.g., data transfers or +memory allocations) is undefined. + +@code{_ITM_getThreadnum} is not supported currently because its only purpose +is to provide a thread ID that matches some assumed performance tuning output, +but this output is not part of the ABI nor further defined by it. + +@code{_ITM_dropReferences} is not supported currently because its semantics and +the intention behind it is not entirely clear. The +specification suggests that this function is necessary because of certain +orderings of data transfer undos and the releasing of memory regions (i.e., +privatization). However, this ordering is never defined, nor is the ordering of +dropping references w.r.t. other events. + +@subsection [New] Transactional indirect calls + +Indirect calls (i.e., calls through a function pointer) within transactions +should execute the transactional clone of the original function (i.e., a clone +of the original that has been fully instrumented to use the TM runtime), if +such a clone is available. The runtime provides two functions to +register/deregister clone tables: + +@example +struct clone_entry +@{ + void *orig, *clone; +@}; + +void _ITM_registerTMCloneTable (clone_entry *table, size_t entries); +void _ITM_deregisterTMCloneTable (clone_entry *table); +@end example + +Registered tables must be writable by the TM runtime, and must be live +throughout the life-time of the TM runtime. + +@strong{TODO} The intention was always to drop the registration functions +entirely, and create a new ELF Phdr describing the linker-sorted table. Much +like what currently happens for @code{PT_GNU_EH_FRAME}. +This work kept getting bogged down in how to represent the @var{N} different +code generation variants. We clearly needed at least two---SW and HW +transactional clones---but there was always a suggestion of more variants for +different TM assumptions/invariants. + +The compiler can then use two TM runtime functions to perform indirect calls in +transactions: +@example +void *_ITM_getTMCloneOrIrrevocable (void *function) ITM_REGPARM; +void *_ITM_getTMCloneSafe (void *function) ITM_REGPARM; +@end example + +If there is a registered clone for supplied function, both will return a +pointer to the clone. If not, the first runtime function will attempt to switch +to serial--irrevocable mode and return the original pointer, whereas the second +will raise a fatal runtime error. + +@subsection [New] Transactional dynamic memory management + +@example +void *_ITM_malloc (size_t) + __attribute__((__malloc__)) ITM_PURE; +void *_ITM_calloc (size_t, size_t) + __attribute__((__malloc__)) ITM_PURE; +void _ITM_free (void *) ITM_PURE; +@end example + +These functions are essentially transactional wrappers for @code{malloc}, +@code{calloc}, and @code{free}. Within transactions, the compiler should +replace calls to the original functions with calls to the wrapper functions. + + +@section [No changes] Future Enhancements to the ABI + +@section Sample code + +The code examples might not be correct w.r.t. the current version of the ABI, +especially everything related to exception handling. + + +@section [New] Memory model + +The ABI should define a memory model and the ordering that is guaranteed for +data transfers and commit/undo actions, or at least refer to another memory +model that needs to be preserved. Without that, the compiler cannot ensure the +memory model specified on the level of the programming language (e.g., by the +C++ TM specification). + +For example, if a transactional load is ordered before another load/store, then +the TM runtime must also ensure this ordering when accessing shared state. If +not, this might break the kind of publication safety used in the C++ TM +specification. Likewise, the TM runtime must ensure privatization safety. + + + +@c --------------------------------------------------------------------- +@c Internals +@c --------------------------------------------------------------------- + +@node Internals +@chapter Internals + +@section TM methods and method groups + +libitm supports several ways of synchronizing transactions with each other. +These TM methods (or TM algorithms) are implemented in the form of +subclasses of @code{abi_dispatch}, which provide methods for +transactional loads and stores as well as callbacks for rollback and commit. +All methods that are compatible with each other (i.e., that let concurrently +running transactions still synchronize correctly even if different methods +are used) belong to the same TM method group. Pointers to TM methods can be +obtained using the factory methods prefixed with @code{dispatch_} in +@file{libitm_i.h}. There are two special methods, @code{dispatch_serial} and +@code{dispatch_serialirr}, that are compatible with all methods because they +run transactions completely in serial mode. + +@subsection TM method life cycle + +The state of TM methods does not change after construction, but they do alter +the state of transactions that use this method. However, because +per-transaction data gets used by several methods, @code{gtm_thread} is +responsible for setting an initial state that is useful for all methods. +After that, methods are responsible for resetting/clearing this state on each +rollback or commit (of outermost transactions), so that the transaction +executed next is not affected by the previous transaction. + +There is also global state associated with each method group, which is +initialized and shut down (@code{method_group::init()} and @code{fini()}) +when switching between method groups (see @file{retry.cc}). + +@subsection Selecting the default method + +The default method that libitm uses for freshly started transactions (but +not necessarily for restarted transactions) can be set via an environment +variable (@env{ITM_DEFAULT_METHOD}), whose value should be equal to the name +of one of the factory methods returning abi_dispatch subclasses but without +the "dispatch_" prefix (e.g., "serialirr" instead of +@code{GTM::dispatch_serialirr()}). + +Note that this environment variable is only a hint for libitm and might not +be supported in the future. + + +@section Nesting: flat vs. closed + +We support two different kinds of nesting of transactions. In the case of +@emph{flat nesting}, the nesting structure is flattened and all nested +transactions are subsumed by the enclosing transaction. In contrast, +with @emph{closed nesting}, nested transactions that have not yet committed +can be rolled back separately from the enclosing transactions; when they +commit, they are subsumed by the enclosing transaction, and their effects +will be finally committed when the outermost transaction commits. +@emph{Open nesting} (where nested transactions can commit independently of the +enclosing transactions) are not supported. + +Flat nesting is the default nesting mode, but closed nesting is supported and +used when transactions contain user-controlled aborts +(@code{__transaction_cancel} statements). We assume that user-controlled +aborts are rare in typical code and used mostly in exceptional situations. +Thus, it makes more sense to use flat nesting by default to avoid the +performance overhead of the additional checkpoints required for closed +nesting. User-controlled aborts will correctly abort the innermost enclosing +transaction, whereas the whole (i.e., outermost) transaction will be restarted +otherwise (e.g., when a transaction encounters data conflicts during +optimistic execution). + + +@section Locking conventions + +This section documents the locking scheme and rules for all uses of locking +in libitm. We have to support serial(-irrevocable) mode, which is implemented +using a global lock as explained next (called the @emph{serial lock}). To +simplify the overall design, we use the same lock as catch-all locking +mechanism for other infrequent tasks such as (de)registering clone tables or +threads. Besides the serial lock, there are @emph{per-method-group locks} that +are managed by specific method groups (i.e., groups of similar TM concurrency +control algorithms), and lock-like constructs for quiescence-based operations +such as ensuring privatization safety. + +Thus, the actions that participate in the libitm-internal locking are either +@emph{active transactions} that do not run in serial mode, @emph{serial +transactions} (which (are about to) run in serial mode), and management tasks +that do not execute within a transaction but have acquired the serial mode +like a serial transaction would do (e.g., to be able to register threads with +libitm). Transactions become active as soon as they have successfully used the +serial lock to announce this globally (@pxref{serial-lock-impl,,Serial lock +implementation}). Likewise, transactions become serial transactions as soon as +they have acquired the exclusive rights provided by the serial lock (i.e., +serial mode, which also means that there are no other concurrent active or +serial transactions). Note that active transactions can become serial +transactions when they enter serial mode during the runtime of the +transaction. + +@subsection State-to-lock mapping + +Application data is protected by the serial lock if there is a serial +transaction and no concurrently running active transaction (i.e., non-serial). +Otherwise, application data is protected by the currently selected method +group, which might use per-method-group locks or other mechanisms. Also note +that application data that is about to be privatized might not be allowed to be +accessed by nontransactional code until privatization safety has been ensured; +the details of this are handled by the current method group. + +libitm-internal state is either protected by the serial lock or accessed +through custom concurrent code. The latter applies to the public/shared part +of a transaction object and most typical method-group-specific state. + +The former category (protected by the serial lock) includes: +@itemize @bullet +@item The list of active threads that have used transactions. +@item The tables that map functions to their transactional clones. +@item The current selection of which method group to use. +@item Some method-group-specific data, or invariants of this data. For example, +resetting a method group to its initial state is handled by switching to the +same method group, so the serial lock protects such resetting as well. +@end itemize +In general, such state is immutable whenever there exists an active +(non-serial) transaction. If there is no active transaction, a serial +transaction (or a thread that is not currently executing a transaction but has +acquired the serial lock) is allowed to modify this state (but must of course +be careful to not surprise the current method group's implementation with such +modifications). + +@subsection Lock acquisition order + +To prevent deadlocks, locks acquisition must happen in a globally agreed-upon +order. Note that this applies to other forms of blocking too, but does not +necessarily apply to lock acquisitions that do not block (e.g., trylock() +calls that do not get retried forever). Note that serial transactions are +never return back to active transactions until the transaction has committed. +Likewise, active transactions stay active until they have committed. +Per-method-group locks are typically also not released before commit. + +Lock acquisition / blocking rules: +@itemize @bullet + +@item Transactions must become active or serial before they are allowed to +use method-group-specific locks or blocking (i.e., the serial lock must be +acquired before those other locks, either in serial or nonserial mode). + +@item Any number of threads that do not currently run active transactions can +block while trying to get the serial lock in exclusive mode. Note that active +transactions must not block when trying to upgrade to serial mode unless there +is no other transaction that is trying that (the latter is ensured by the +serial lock implementation. + +@item Method groups must prevent deadlocks on their locks. In particular, they +must also be prepared for another active transaction that has acquired +method-group-specific locks but is blocked during an attempt to upgrade to +being a serial transaction. See below for details. + +@item Serial transactions can acquire method-group-specific locks because there +will be no other active nor serial transaction. + +@end itemize + +There is no single rule for per-method-group blocking because this depends on +when a TM method might acquire locks. If no active transaction can upgrade to +being a serial transaction after it has acquired per-method-group locks (e.g., +when those locks are only acquired during an attempt to commit), then the TM +method does not need to consider a potential deadlock due to serial mode. + +If there can be upgrades to serial mode after the acquisition of +per-method-group locks, then TM methods need to avoid those deadlocks: +@itemize @bullet +@item When upgrading to a serial transaction, after acquiring exclusive rights +to the serial lock but before waiting for concurrent active transactions to +finish (@pxref{serial-lock-impl,,Serial lock implementation} for details), +we have to wake up all active transactions waiting on the upgrader's +per-method-group locks. +@item Active transactions blocking on per-method-group locks need to check the +serial lock and abort if there is a pending serial transaction. +@item Lost wake-ups have to be prevented (e.g., by changing a bit in each +per-method-group lock before doing the wake-up, and only blocking on this lock +using a futex if this bit is not group). +@end itemize + +@strong{TODO}: Can reuse serial lock for gl-*? And if we can, does it make +sense to introduce further complexity in the serial lock? For gl-*, we can +really only avoid an abort if we do -wb and -vbv. + + +@subsection Serial lock implementation +@anchor{serial-lock-impl} + +The serial lock implementation is optimized towards assuming that serial +transactions are infrequent and not the common case. However, the performance +of entering serial mode can matter because when only few transactions are run +concurrently or if there are few threads, then it can be efficient to run +transactions serially. + +The serial lock is similar to a multi-reader-single-writer lock in that there +can be several active transactions but only one serial transaction. However, +we do want to avoid contention (in the lock implementation) between active +transactions, so we split up the reader side of the lock into per-transaction +flags that are true iff the transaction is active. The exclusive writer side +remains a shared single flag, which is acquired using a CAS, for example. +On the fast-path, the serial lock then works similar to Dekker's algorithm but +with several reader flags that a serial transaction would have to check. +A serial transaction thus requires a list of all threads with potentially +active transactions; we can use the serial lock itself to protect this list +(i.e., only threads that have acquired the serial lock can modify this list). + +We want starvation-freedom for the serial lock to allow for using it to ensure +progress for potentially starved transactions (@pxref{progress-guarantees,, +Progress Guarantees} for details). However, this is currently not enforced by +the implementation of the serial lock. + +Here is pseudo-code for the read/write fast paths of acquiring the serial +lock (read-to-write upgrade is similar to write_lock: +@example +// read_lock: +tx->shared_state |= active; +__sync_synchronize(); // or STLD membar, or C++0x seq-cst fence +while (!serial_lock.exclusive) + if (spinning_for_too_long) goto slowpath; + +// write_lock: +if (CAS(&serial_lock.exclusive, 0, this) != 0) + goto slowpath; // writer-writer contention +// need a membar here, but CAS already has full membar semantics +bool need_blocking = false; +for (t: all txns) + @{ + for (;t->shared_state & active;) + if (spinning_for_too_long) @{ need_blocking = true; break; @} + @} +if (need_blocking) goto slowpath; +@end example + +Releasing a lock in this spin-lock version then just consists of resetting +@code{tx->shared_state} to inactive or clearing @code{serial_lock.exclusive}. + +However, we can't rely on a pure spinlock because we need to get the OS +involved at some time (e.g., when there are more threads than CPUs to run on). +Therefore, the real implementation falls back to a blocking slow path, either +based on pthread mutexes or Linux futexes. + + +@subsection Reentrancy + +libitm has to consider the following cases of reentrancy: +@itemize @bullet + +@item Transaction calls unsafe code that starts a new transaction: The outer +transaction will become a serial transaction before executing unsafe code. +Therefore, nesting within serial transactions must work, even if the nested +transaction is called from within uninstrumented code. + +@item Transaction calls either a transactional wrapper or safe code, which in +turn starts a new transaction: It is not yet defined in the specification +whether this is allowed. Thus, it is undefined whether libitm supports this. + +@item Code that starts new transactions might be called from within any part +of libitm: This kind of reentrancy would likely be rather complex and can +probably be avoided. Therefore, it is not supported. + +@end itemize + +@subsection Privatization safety + +Privatization safety is ensured by libitm using a quiescence-based approach. +Basically, a privatizing transaction waits until all concurrent active +transactions will either have finished (are not active anymore) or operate on +a sufficiently recent snapshot to not access the privatized data anymore. This +happens after the privatizing transaction has stopped being an active +transaction, so waiting for quiescence does not contribute to deadlocks. + +In method groups that need to ensure publication safety explicitly, active +transactions maintain a flag or timestamp in the public/shared part of the +transaction descriptor. Before blocking, privatizers need to let the other +transactions know that they should wake up the privatizer. + +@strong{TODO} Ho to implement the waiters? Should those flags be +per-transaction or at a central place? We want to avoid one wake/wait call +per active transactions, so we might want to use either a tree or combining +to reduce the syscall overhead, or rather spin for a long amount of time +instead of doing blocking. Also, it would be good if only the last transaction +that the privatizer waits for would do the wake-up. + +@subsection Progress guarantees +@anchor{progress-guarantees} + +Transactions that do not make progress when using the current TM method will +eventually try to execute in serial mode. Thus, the serial lock's progress +guarantees determine the progress guarantees of the whole TM. Obviously, we at +least need deadlock-freedom for the serial lock, but it would also be good to +provide starvation-freedom (informally, all threads will finish executing a +transaction eventually iff they get enough cycles). + +However, the scheduling of transactions (e.g., thread scheduling by the OS) +also affects the handling of progress guarantees by the TM. First, the TM +can only guarantee deadlock-freedom if threads do not get stopped. Likewise, +low-priority threads can starve if they do not get scheduled when other +high-priority threads get those cycles instead. + +If all threads get scheduled eventually, correct lock implementations will +provide deadlock-freedom, but might not provide starvation-freedom. We can +either enforce the latter in the TM's lock implementation, or assume that +the scheduling is sufficiently random to yield a probabilistic guarantee that +no thread will starve (because eventually, a transaction will encounter a +scheduling that will allow it to run). This can indeed work well in practice +but is not necessarily guaranteed to work (e.g., simple spin locks can be +pretty efficient). + +Because enforcing stronger progress guarantees in the TM has a higher runtime +overhead, we focus on deadlock-freedom right now and assume that the threads +will get scheduled eventually by the OS (but don't consider threads with +different priorities). We should support starvation-freedom for serial +transactions in the future. Everything beyond that is highly related to proper +contention management across all of the TM (including with TM method to +choose), and is future work. + +@strong{TODO} Handling thread priorities: We want to avoid priority inversion +but it's unclear how often that actually matters in practice. Workloads that +have threads with different priorities will likely also require lower latency +or higher throughput for high-priority threads. Therefore, it probably makes +not that much sense (except for eventual progress guarantees) to use +priority inheritance until the TM has priority-aware contention management. + + +@c --------------------------------------------------------------------- +@c GNU General Public License +@c --------------------------------------------------------------------- + +@include gpl.texi + + + +@c --------------------------------------------------------------------- +@c GNU Free Documentation License +@c --------------------------------------------------------------------- + +@include fdl.texi + + + +@c --------------------------------------------------------------------- +@c Funding Free Software +@c --------------------------------------------------------------------- + +@include funding.texi + +@c --------------------------------------------------------------------- +@c Index +@c --------------------------------------------------------------------- + +@node Index +@unnumbered Index + +@printindex cp + +@bye Index: libitm/containers.h =================================================================== --- libitm/containers.h (.../trunk) (revision 0) +++ libitm/containers.h (.../branches/transactional-memory) (revision 180773) @@ -0,0 +1,108 @@ +/* Copyright (C) 2011 Free Software Foundation, Inc. + Contributed by Torvald Riegel . + + This file is part of the GNU Transactional Memory Library (libitm). + + Libitm is free software; you can redistribute it and/or modify it + under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3 of the License, or + (at your option) any later version. + + Libitm is distributed in the hope that it will be useful, but WITHOUT ANY + WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS + FOR A PARTICULAR PURPOSE. See the GNU General Public License for + more details. + + Under Section 7 of GPL version 3, you are granted additional + permissions described in the GCC Runtime Library Exception, version + 3.1, as published by the Free Software Foundation. + + You should have received a copy of the GNU General Public License and + a copy of the GCC Runtime Library Exception along with this program; + see the files COPYING3 and COPYING.RUNTIME respectively. If not, see + . */ + +#ifndef LIBITM_CONTAINERS_H +#define LIBITM_CONTAINERS_H 1 + +#include "common.h" + +namespace GTM HIDDEN { + +// A simple vector-like container. +// If alloc_seperate_cl is true, allocations will happen on separate cache +// lines. +template +class vector +{ + private: + size_t m_capacity; + size_t m_size; + T* entries; + + // Initial capacity of the vector. + static const size_t default_initial_capacity = 32; + // Above that capacity, grow vector by that size for each call. + static const size_t default_resize_max = 2048; + // Resize vector to at least this capacity. + static const size_t default_resize_min = 32; + + // Don't try to copy this vector. + vector(const vector& x); + + public: + typedef T datatype; + typedef T* iterator; + + iterator begin() const { return entries; } + iterator end() const { return entries + m_size; } + T& operator[] (size_t pos) { return entries[pos]; } + const T& operator[] (size_t pos) const { return entries[pos]; } + + vector(size_t initial_size = default_initial_capacity) + : m_capacity(initial_size), + m_size(0) + { + if (m_capacity > 0) + entries = (T*) xmalloc(sizeof(T) * m_capacity, alloc_separate_cl); + else + entries = 0; + } + ~vector() { if (m_capacity) free(entries); } + + void resize() + { + if (m_capacity >= default_resize_max) + m_capacity = m_capacity + default_resize_max; + else + m_capacity = m_capacity * 2; + if (m_capacity < default_resize_min) + m_capacity = default_resize_min; + entries = (T*) xrealloc(entries, sizeof(T) * m_capacity, alloc_separate_cl); + } + void resize_noinline() __attribute__((noinline)) { resize(); } + + size_t size() const { return m_size; } + size_t capacity() const { return this->capacity; } + + void clear() { m_size = 0; } + + iterator push() { + // We don't want inlining here since push() is often on the fast path. + if (unlikely(m_size == m_capacity)) resize_noinline(); + return &entries[m_size++]; + } + + iterator pop() { + if (likely(m_size > 0)) + { + m_size--; + return entries + m_size; + } + else return 0; + } +}; + +} // namespace GTM + +#endif // LIBITM_CONTAINERS_H Index: libitm/libitm.map =================================================================== --- libitm/libitm.map (.../trunk) (revision 0) +++ libitm/libitm.map (.../branches/transactional-memory) (revision 180773) @@ -0,0 +1,185 @@ +LIBITM_1.0 { + global: + _ITM_abortTransaction; + _ITM_addUserCommitAction; + _ITM_addUserUndoAction; + _ITM_beginTransaction; + _ITM_changeTransactionMode; + _ITM_commitTransaction; + _ITM_commitTransactionEH; + _ITM_error; + _ITM_getThreadnum; + _ITM_getTransactionId; + _ITM_inTransaction; + _ITM_libraryVersion; + _ITM_versionCompatible; + + _ITM_registerTMCloneTable; + _ITM_deregisterTMCloneTable; + _ITM_getTMCloneOrIrrevocable; + _ITM_getTMCloneSafe; + + _ITM_LB; + _ITM_LCD; + _ITM_LCE; + _ITM_LCF; + _ITM_LD; + _ITM_LE; + _ITM_LF; + _ITM_LM128; + _ITM_LM256; + _ITM_LM64; + _ITM_LU1; + _ITM_LU2; + _ITM_LU4; + _ITM_LU8; + + _ITM_RCD; + _ITM_RCE; + _ITM_RCF; + _ITM_RD; + _ITM_RE; + _ITM_RF; + _ITM_RM128; + _ITM_RM256; + _ITM_RM64; + _ITM_RU1; + _ITM_RU2; + _ITM_RU4; + _ITM_RU8; + _ITM_RaRCD; + _ITM_RaRCE; + _ITM_RaRCF; + _ITM_RaRD; + _ITM_RaRE; + _ITM_RaRF; + _ITM_RaRM128; + _ITM_RaRM256; + _ITM_RaRM64; + _ITM_RaRU1; + _ITM_RaRU2; + _ITM_RaRU4; + _ITM_RaRU8; + _ITM_RaWCD; + _ITM_RaWCE; + _ITM_RaWCF; + _ITM_RaWD; + _ITM_RaWE; + _ITM_RaWF; + _ITM_RaWM128; + _ITM_RaWM256; + _ITM_RaWM64; + _ITM_RaWU1; + _ITM_RaWU2; + _ITM_RaWU4; + _ITM_RaWU8; + _ITM_RfWCD; + _ITM_RfWCE; + _ITM_RfWCF; + _ITM_RfWD; + _ITM_RfWE; + _ITM_RfWF; + _ITM_RfWM128; + _ITM_RfWM256; + _ITM_RfWM64; + _ITM_RfWU1; + _ITM_RfWU2; + _ITM_RfWU4; + _ITM_RfWU8; + + _ITM_WCD; + _ITM_WCE; + _ITM_WCF; + _ITM_WD; + _ITM_WE; + _ITM_WF; + _ITM_WM128; + _ITM_WM256; + _ITM_WM64; + _ITM_WU1; + _ITM_WU2; + _ITM_WU4; + _ITM_WU8; + _ITM_WaRCD; + _ITM_WaRCE; + _ITM_WaRCF; + _ITM_WaRD; + _ITM_WaRE; + _ITM_WaRF; + _ITM_WaRM128; + _ITM_WaRM256; + _ITM_WaRM64; + _ITM_WaRU1; + _ITM_WaRU2; + _ITM_WaRU4; + _ITM_WaRU8; + _ITM_WaWCD; + _ITM_WaWCE; + _ITM_WaWCF; + _ITM_WaWD; + _ITM_WaWE; + _ITM_WaWF; + _ITM_WaWM128; + _ITM_WaWM256; + _ITM_WaWM64; + _ITM_WaWU1; + _ITM_WaWU2; + _ITM_WaWU4; + _ITM_WaWU8; + + _ITM_memcpyRnWt; + _ITM_memcpyRnWtaR; + _ITM_memcpyRnWtaW; + _ITM_memcpyRtWn; + _ITM_memcpyRtWt; + _ITM_memcpyRtWtaR; + _ITM_memcpyRtWtaW; + _ITM_memcpyRtaRWn; + _ITM_memcpyRtaRWt; + _ITM_memcpyRtaRWtaR; + _ITM_memcpyRtaRWtaW; + _ITM_memcpyRtaWWn; + _ITM_memcpyRtaWWt; + _ITM_memcpyRtaWWtaR; + _ITM_memcpyRtaWWtaW; + _ITM_memmoveRnWt; + _ITM_memmoveRnWtaR; + _ITM_memmoveRnWtaW; + _ITM_memmoveRtWn; + _ITM_memmoveRtWt; + _ITM_memmoveRtWtaR; + _ITM_memmoveRtWtaW; + _ITM_memmoveRtaRWn; + _ITM_memmoveRtaRWt; + _ITM_memmoveRtaRWtaR; + _ITM_memmoveRtaRWtaW; + _ITM_memmoveRtaWWn; + _ITM_memmoveRtaWWt; + _ITM_memmoveRtaWWtaR; + _ITM_memmoveRtaWWtaW; + _ITM_memsetW; + _ITM_memsetWaR; + _ITM_memsetWaW; + + _ITM_malloc; + _ITM_calloc; + _ITM_free; + _ITM_dropReferences; + + _ZGTtnw?; + _ZGTtna?; + _ZGTtdlPv; + _ZGTtdaPv; + _ZGTtnw?RKSt9nothrow_t; + _ZGTtna?RKSt9nothrow_t; + _ZGTtdlPvRKSt9nothrow_t; + _ZGTtdaPvRKSt9nothrow_t; + + _ITM_cxa_allocate_exception; + _ITM_cxa_begin_catch; + _ITM_cxa_end_catch; + _ITM_cxa_throw; + + local: + *; +};