===================================================================
@@ -91,11 +91,13 @@
uint32_t
GTM::gtm_transaction::begin_transaction (uint32_t prop, const gtm_jmpbuf *jb)
{
+ static const _ITM_transactionId_t tid_block_size = 1 << 16;
+
gtm_transaction *tx;
gtm_dispatch *disp;
uint32_t ret;
- setup_gtm_thr ();
+ gtm_thread *thr = setup_gtm_thr ();
tx = new gtm_transaction;
@@ -103,13 +105,25 @@
tx->prev = gtm_tx();
if (tx->prev)
tx->nesting = tx->prev->nesting + 1;
+
+ // As long as we have not exhausted a previously allocated block of TIDs,
+ // we can avoid an atomic operation on a shared cacheline.
+ if (thr->local_tid & (tid_block_size - 1))
+ tx->id = thr->local_tid++;
+ else
+ {
#ifdef HAVE_64BIT_SYNC_BUILTINS
- tx->id = __sync_add_and_fetch (&global_tid, 1);
+ tx->id = __sync_add_and_fetch (&global_tid, tid_block_size);
+ thr->local_tid = tx->id + 1;
#else
- pthread_mutex_lock (&global_tid_lock);
- tx->id = ++global_tid;
- pthread_mutex_unlock (&global_tid_lock);
+ pthread_mutex_lock (&global_tid_lock);
+ global_tid += tid_block_size;
+ tx->id = global_tid;
+ thr->local_tid = tx->id + 1;
+ pthread_mutex_unlock (&global_tid_lock);
#endif
+ }
+
tx->jb = *jb;
set_gtm_tx (tx);
===================================================================
@@ -65,10 +65,15 @@
return r;
}
-static inline void setup_gtm_thr(void)
+static inline struct gtm_thread *setup_gtm_thr(void)
{
- if (gtm_thr() == NULL)
- asm volatile (SEG_WRITE(10) : : "r"(&_gtm_thr));
+ gtm_thread *thr = gtm_thr();
+ if (thr == NULL)
+ {
+ thr = &_gtm_thr;
+ asm volatile (SEG_WRITE(10) : : "r"(thr));
+ }
+ return thr;
}
static inline struct gtm_transaction * gtm_tx(void)
===================================================================
@@ -50,6 +50,12 @@
void *free_tx[MAX_FREE_TX];
unsigned free_tx_idx, free_tx_count;
+ // In order to reduce cacheline contention on global_tid during
+ // beginTransaction, we allocate a block of 2**N ids to the thread
+ // all at once. This number is the next value to be allocated from
+ // the block, or 0 % 2**N if no such block is allocated.
+ _ITM_transactionId_t local_tid;
+
// The value returned by _ITM_getThreadnum to identify this thread.
// ??? At present, this is densely allocated beginning with 1 and
// we don't bother filling in this value until it is requested.
@@ -67,7 +73,7 @@
#ifndef HAVE_ARCH_GTM_THREAD
// If the target does not provide optimized access to the thread-local
// data, simply access the TLS variable defined above.
-static inline void setup_gtm_thr() { }
+static inline gtm_thread *setup_gtm_thr() { return &_gtm_thr; }
static inline gtm_thread *gtm_thr() { return &_gtm_thr; }
#endif