Patchwork __cxa_guard_acquire memory barriers

login
register
mail settings
Submitter Alan Modra
Date April 9, 2012, 12:10 a.m.
Message ID <20120409001051.GD2736@bubble.grove.modra.org>
Download mbox | patch
Permalink /patch/151381/
State New
Headers show

Comments

Alan Modra - April 9, 2012, 12:10 a.m.
Recently I was hunting for bugs in __cxa_guard_acquire/release that
might result in pr52839.  No bugs found, but I did notice that these
functions use excessive memory barriers, no doubt from their
conversion from sync to atomic builtins.

Making the second __atomic_compare_exchange_n in __cxa_guard_acquire
__ATOMIC_RELAXED is easy to justify:  This atomic op is in a loop, the
entirety of which just attempts to modify the guard.  No other shared
memory is touched, and the loop terminates after the first atomic op.
So only the first atomic op requires a memory barrier.  That the
barrier on the first atomic op should be __ATOMIC_ACQUIRE can be seen
from gcc/cp/decl.c:expand_static_init code and comments.  We need an
acquire (or read) barrier so that a thread that doesn't perform the
initialization won't have stale data cached from before the guard was
set to say that the guarded static var has been successfully
initialized.  A read barrier is sufficient because no thread has
written anything to the guarded var before that point.

The atomic op in __cxa_guard_release needs to be __ATOMIC_RELEASE (a
release or write barrier) so that all writes to the guarded static var
are flushed before the guard state changes.  Similar reasoning for
__cxa_guard_abort says we need a write barrier so that intermediate or
partial writes to the guarded variable from the aborted thread are
flushed before some other thread starts the initialization.  We
wouldn't want any writes from the aborted thread corrupting a good
initialization.  Quite obviously we don't need a read barrier at these
points since only the initialization thread gets there.

Bootstrapped and regression tested on power7.  OK to apply?

2012-04-08  Alan Modra  <amodra@gmail.com>

	* libsupc++/guard.cc: Whitespace cleanup.
	(__cxa_guard_acquire): Use acquire barrier on first atomic
	compare exchange, no barriers on second.
	(__cxa_guard_abort, __cxa_guard_release): Use release barrier on
	atomic exchange.

Patch

Index: libstdc++-v3/libsupc++/guard.cc
===================================================================
--- libstdc++-v3/libsupc++/guard.cc	(revision 186130)
+++ libstdc++-v3/libsupc++/guard.cc	(working copy)
@@ -1,6 +1,6 @@ 
 // Copyright (C) 2002, 2004, 2006, 2008, 2009, 2010, 2011, 2012
 // Free Software Foundation, Inc.
-//  
+//
 // This file is part of GCC.
 //
 // GCC is free software; you can redistribute it and/or modify
@@ -50,7 +50,7 @@ 
 namespace
 {
   // A single mutex controlling all static initializations.
-  static __gnu_cxx::__recursive_mutex* static_mutex;  
+  static __gnu_cxx::__recursive_mutex* static_mutex;
 
   typedef char fake_recursive_mutex[sizeof(__gnu_cxx::__recursive_mutex)]
   __attribute__ ((aligned(__alignof__(__gnu_cxx::__recursive_mutex))));
@@ -87,7 +87,7 @@ 
 namespace
 {
   // A single condition variable controlling all static initializations.
-  static __gnu_cxx::__cond* static_cond;  
+  static __gnu_cxx::__cond* static_cond;
 
   // using a fake type to avoid initializing a static class.
   typedef char fake_cond_t[sizeof(__gnu_cxx::__cond)]
@@ -179,7 +179,7 @@ 
 //  | _GLIBCXX_GUARD_WAITING_BIT) and some other threads are waiting until
 //				  it is initialized.
 
-namespace __cxxabiv1 
+namespace __cxxabiv1
 {
 #ifdef _GLIBCXX_USE_FUTEX
   namespace
@@ -229,7 +229,7 @@ 
   }
 
   extern "C"
-  int __cxa_guard_acquire (__guard *g) 
+  int __cxa_guard_acquire (__guard *g)
   {
 #ifdef __GTHREADS
     // If the target can reorder loads, we need to insert a read memory
@@ -252,26 +252,26 @@ 
 	while (1)
 	  {
 	    if (__atomic_compare_exchange_n(gi, &expected, pending_bit, false,
-					    __ATOMIC_ACQ_REL,
+					    __ATOMIC_ACQUIRE,
 					    __ATOMIC_RELAXED))
 	      {
 		// This thread should do the initialization.
 		return 1;
 	      }
-	      
+
 	    if (expected == guard_bit)
 	      {
 		// Already initialized.
-		return 0;	
+		return 0;
 	      }
 	     if (expected == pending_bit)
 	       {
 		 int newv = expected | waiting_bit;
 		 if (!__atomic_compare_exchange_n(gi, &expected, newv, false,
-						  __ATOMIC_ACQ_REL, 
+						  __ATOMIC_RELAXED,
 						  __ATOMIC_RELAXED))
 		   continue;
-		 
+
 		 expected = newv;
 	       }
 
@@ -331,7 +331,7 @@ 
       {
 	int *gi = (int *) (void *) g;
 	const int waiting_bit = _GLIBCXX_GUARD_WAITING_BIT;
-	int old = __atomic_exchange_n (gi, 0, __ATOMIC_ACQ_REL);
+	int old = __atomic_exchange_n (gi, 0, __ATOMIC_RELEASE);
 
 	if ((old & waiting_bit) != 0)
 	  syscall (SYS_futex, gi, _GLIBCXX_FUTEX_WAKE, INT_MAX);
@@ -339,16 +339,16 @@ 
       }
 #elif defined(__GTHREAD_HAS_COND)
     if (__gthread_active_p())
-      {	
+      {
 	mutex_wrapper mw;
 
 	set_init_in_progress_flag(g, 0);
 
 	// If we abort, we still need to wake up all other threads waiting for
 	// the condition variable.
-        get_static_cond().broadcast();
+	get_static_cond().broadcast();
 	return;
-      }	
+      }
 #endif
 
     set_init_in_progress_flag(g, 0);
@@ -371,7 +371,7 @@ 
 	int *gi = (int *) (void *) g;
 	const int guard_bit = _GLIBCXX_GUARD_BIT;
 	const int waiting_bit = _GLIBCXX_GUARD_WAITING_BIT;
-	int old = __atomic_exchange_n (gi, guard_bit, __ATOMIC_ACQ_REL);
+	int old = __atomic_exchange_n (gi, guard_bit, __ATOMIC_RELEASE);
 
 	if ((old & waiting_bit) != 0)
 	  syscall (SYS_futex, gi, _GLIBCXX_FUTEX_WAKE, INT_MAX);
@@ -385,9 +385,9 @@ 
 	set_init_in_progress_flag(g, 0);
 	_GLIBCXX_GUARD_SET_AND_RELEASE(g);
 
-        get_static_cond().broadcast();
+	get_static_cond().broadcast();
 	return;
-      }	
+      }
 #endif
 
     set_init_in_progress_flag(g, 0);