Message ID | 20161109212640.2552-1-suokkos@gmail.com |
---|---|
State | New |
Headers | show |
I missed this patch until today and have only just seen it, sorry. On 09/11/16 23:26 +0200, Pauli wrote: >Atomic reference counting generates pessimistic code in platforms where >builtin atomics could optimize code for following branch with subtract >instruction. Which targets are affected? >To allow better code generation with compile time constant addition can >be checked for negative value. Those cases can then be better optimized >with __atomic_fetch_sub builtin. OK, that makes sense. >Extra branch checking presence of threads in application generates a >lot extra instructions when builtin atomics often generate very few >instructions. I'd like to see more justification for this. My understanding was that the __gthread_active_p() check was cheap in many cases, and that things like std::string reference counting in single-threaded applications benefit from avoiding the atomics. I don't feel comfortable making this change without data. >Can I assume that __builtin_constant_p is available in all compilers >with _GLIBCXX_ATOMIC_BUILTINS is available? Yes, that's fine. It's always available. >--- a/libstdc++-v3/include/ext/atomicity.h >+++ b/libstdc++-v3/include/ext/atomicity.h >@@ -35,6 +35,14 @@ > #include <bits/gthr.h> > #include <bits/atomic_word.h> > >+#if defined(__GCC_ATOMIC_INT_LOCK_FREE) Isn't this always defined? >+#define _GLIBCXX_INT_LOCK_FREE __GCC_ATOMIC_INT_LOCK_FREE >+#else >+#define _GLIBCXX_INT_LOCK_FREE 2 Why assume always lock-free if the macro isn't defined? >+#endif >+#define _GLIBCXX_ATOMIC_INLINE \ >+ (_GLIBCXX_ATOMIC_BUILTINS && _GLIBCXX_INT_LOCK_FREE == 2) >+ > namespace __gnu_cxx _GLIBCXX_VISIBILITY(default) > { > _GLIBCXX_BEGIN_NAMESPACE_VERSION
diff --git a/libstdc++-v3/include/ext/atomicity.h b/libstdc++-v3/include/ext/atomicity.h index 0fcb390..ecf5a9d 100644 --- a/libstdc++-v3/include/ext/atomicity.h +++ b/libstdc++-v3/include/ext/atomicity.h @@ -35,6 +35,14 @@ #include <bits/gthr.h> #include <bits/atomic_word.h> +#if defined(__GCC_ATOMIC_INT_LOCK_FREE) +#define _GLIBCXX_INT_LOCK_FREE __GCC_ATOMIC_INT_LOCK_FREE +#else +#define _GLIBCXX_INT_LOCK_FREE 2 +#endif +#define _GLIBCXX_ATOMIC_INLINE \ + (_GLIBCXX_ATOMIC_BUILTINS && _GLIBCXX_INT_LOCK_FREE == 2) + namespace __gnu_cxx _GLIBCXX_VISIBILITY(default) { _GLIBCXX_BEGIN_NAMESPACE_VERSION @@ -46,11 +54,16 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION #ifdef _GLIBCXX_ATOMIC_BUILTINS static inline _Atomic_word __exchange_and_add(volatile _Atomic_word* __mem, int __val) - { return __atomic_fetch_add(__mem, __val, __ATOMIC_ACQ_REL); } + { + if (__builtin_constant_p(__val) && __val < 0) + return __atomic_fetch_sub(__mem, 0 - __val, __ATOMIC_ACQ_REL); + else + return __atomic_fetch_add(__mem, __val, __ATOMIC_ACQ_REL); + } static inline void __atomic_add(volatile _Atomic_word* __mem, int __val) - { __atomic_fetch_add(__mem, __val, __ATOMIC_ACQ_REL); } + { __exchange_and_add(__mem, __val); } #else _Atomic_word __attribute__ ((__unused__)) @@ -78,7 +91,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION __exchange_and_add_dispatch(_Atomic_word* __mem, int __val) { #ifdef __GTHREADS - if (__gthread_active_p()) + if (_GLIBCXX_ATOMIC_INLINE || __gthread_active_p()) return __exchange_and_add(__mem, __val); else return __exchange_and_add_single(__mem, __val); @@ -92,7 +105,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION __atomic_add_dispatch(_Atomic_word* __mem, int __val) { #ifdef __GTHREADS - if (__gthread_active_p()) + if (_GLIBCXX_ATOMIC_INLINE || __gthread_active_p()) __atomic_add(__mem, __val); else __atomic_add_single(__mem, __val); @@ -104,6 +117,9 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION _GLIBCXX_END_NAMESPACE_VERSION } // namespace +#undef _GLIBCXX_INT_LOCK_FREE +#undef _GLIBCXX_ATOMIC_INLINE + // Even if the CPU doesn't need a memory barrier, we need to ensure // that the compiler doesn't reorder memory accesses across the // barriers.