diff mbox

[libstdc++] Fix data races in basic_string implementation

Message ID CACT4Y+Yk22UTKyvAD=SCxbMU4SGjra7T1gG7aSikHLbLWxvk1g@mail.gmail.com
State New
Headers show

Commit Message

Dmitry Vyukov Sept. 1, 2015, 12:51 p.m. UTC
Hello,

The refcounted basic_string implementation contains several data races
on _M_refcount:
1. _M_is_leaked loads _M_refcount concurrently with mutations of
_M_refcount. This loads needs to be memory_order_relaxed load, as
_M_is_leaked predicate does not change under the feet.
2. _M_is_shared loads _M_refcount concurrently with mutations of
_M_refcount. This loads needs to be memory_order_acquire, as another
thread can drop _M_refcount to zero concurrently which makes the
string non-shared and so the current thread can mutate the string. We
need reads of the string in another thread (while it was shared) to
happen-before the writes to the string in this thread (now that it is
non-shared).

This patch adds __gnu_cxx::__atomic_load_dispatch function to do the
loads of _M_refcount. The function does an acquire load. Acquire is
non needed for _M_is_leaked, but for simplicity as still do acquire
(hopefully the refcounted impl will go away in future).
This patch also uses the new function to do loads of _M_refcount in
string implementation.

I did non update doc/xml/manual/concurrency_extensions.xml to document
__gnu_cxx::__atomic_load_dispatch, because I am not sure we want to
extend that api now that we have language-level atomics. If you still
want me to update it, please say how to regenerate
doc/html/manual/ext_concurrency.html.

The race was detected with ThreadSanitizer on the following program:

#define _GLIBCXX_USE_CXX11_ABI 0
#include <string>
#include <thread>
#include <iostream>
#include <chrono>

int main() {
  std::string s = "foo";
  std::thread t([=](){
    std::string x = s;
    std::cout << &x << std::endl;
  });
  std::this_thread::sleep_for(std::chrono::seconds(1));
  std::cout << &s[0] << std::endl;
  t.join();
}

$ g++ string.cc -std=c++11 -pthread -O1 -g -fsanitize=thread
$ ./a.out

WARNING: ThreadSanitizer: data race (pid=98135)
  Read of size 4 at 0x7d080000efd0 by main thread:
    #0 std::string::_Rep::_M_is_leaked() const
include/c++/6.0.0/bits/basic_string.h:2605 (a.out+0x000000401d7c)
    #1 std::string::_M_leak()
include/c++/6.0.0/bits/basic_string.h:2730 (a.out+0x000000401d7c)
    #2 std::string::operator[](unsigned long)
include/c++/6.0.0/bits/basic_string.h:3274 (a.out+0x000000401d7c)
    #3 main /tmp/string.cc:14 (a.out+0x000000401d7c)

  Previous atomic write of size 4 at 0x7d080000efd0 by thread T1:
    #0 __tsan_atomic32_fetch_add
../../../../libsanitizer/tsan/tsan_interface_atomic.cc:611
(libtsan.so.0+0x00000005811f)
    #1 __exchange_and_add include/c++/6.0.0/ext/atomicity.h:59
(a.out+0x000000401a19)
    #2 __exchange_and_add_dispatch
include/c++/6.0.0/ext/atomicity.h:92 (a.out+0x000000401a19)
    #3 std::string::_Rep::_M_dispose(std::allocator<char> const&)
include/c++/6.0.0/bits/basic_string.h:2659 (a.out+0x000000401a19)
    #4 std::basic_string<char, std::char_traits<char>,
std::allocator<char> >::~basic_string()
include/c++/6.0.0/bits/basic_string.h:2961 (a.out+0x000000401a19)
    #5 ~<lambda> /tmp/string.cc:9 (a.out+0x000000401a19)
    #6 ~_Head_base include/c++/6.0.0/tuple:102 (a.out+0x000000401a19)
    #7 ~_Tuple_impl include/c++/6.0.0/tuple:338 (a.out+0x000000401a19)
    #8 ~tuple include/c++/6.0.0/tuple:521 (a.out+0x000000401a19)
    #9 ~_Bind_simple include/c++/6.0.0/functional:1503 (a.out+0x000000401a19)
    #10 ~_Impl include/c++/6.0.0/thread:107 (a.out+0x000000401a19)
    #11 destroy<std::thread::_Impl<std::_Bind_simple<main()::<lambda()>()>
> > include/c++/6.0.0/ext/new_allocator.h:124 (a.out+0x0000004015c7)
    #12 _S_destroy<std::allocator<std::thread::_Impl<std::_Bind_simple<main()::<lambda()>()>
> >, std::thread::_Impl<std::_Bind_simple<main()::<lambda()>()> > >
include/c++/6.0.0/bits/alloc_traits.h:236 (a.out+0x0000004015c7)
    #13 destroy<std::thread::_Impl<std::_Bind_simple<main()::<lambda()>()>
> > include/c++/6.0.0/bits/alloc_traits.h:336 (a.out+0x0000004015c7)
    #14 _M_dispose include/c++/6.0.0/bits/shared_ptr_base.h:529
(a.out+0x0000004015c7)
    #15 std::_Sp_counted_base<(__gnu_cxx::_Lock_policy)2>::_M_release()
/usr/local/google/home/dvyukov/src/gcc/build/x86_64-pc-linux-gnu/libstdc++-v3/include/bits/shared_ptr_base.h:150
(libstdc++.so.6+0x0000000b6da4)
    #16 std::__shared_count<(__gnu_cxx::_Lock_policy)2>::~__shared_count()
/usr/local/google/home/dvyukov/src/gcc/build/x86_64-pc-linux-gnu/libstdc++-v3/include/bits/shared_ptr_base.h:662
(libstdc++.so.6+0x0000000b6da4)
    #17 std::__shared_ptr<std::thread::_Impl_base,
(__gnu_cxx::_Lock_policy)2>::~__shared_ptr()
/usr/local/google/home/dvyukov/src/gcc/build/x86_64-pc-linux-gnu/libstdc++-v3/include/bits/shared_ptr_base.h:928
(libstdc++.so.6+0x0000000b6da4)
    #18 std::shared_ptr<std::thread::_Impl_base>::~shared_ptr()
/usr/local/google/home/dvyukov/src/gcc/build/x86_64-pc-linux-gnu/libstdc++-v3/include/bits/shared_ptr.h:93
(libstdc++.so.6+0x0000000b6da4)
    #19 execute_native_thread_routine
libstdc++-v3/src/c++11/thread.cc:79 (libstdc++.so.6+0x0000000b6da4)

  Location is heap block of size 28 at 0x7d080000efc0 allocated by main thread:
    #0 operator new(unsigned long)
../../../../libsanitizer/tsan/tsan_interceptors.cc:571
(libtsan.so.0+0x0000000255a3)
    #1 __gnu_cxx::new_allocator<char>::allocate(unsigned long, void
const*) /usr/local/google/home/dvyukov/src/gcc/build/x86_64-pc-linux-gnu/libstdc++-v3/include/ext/new_allocator.h:104
(libstdc++.so.6+0x0000000cbaa8)
    #2 std::string::_Rep::_S_create(unsigned long, unsigned long,
std::allocator<char> const&)
/usr/local/google/home/dvyukov/src/gcc/build/x86_64-pc-linux-gnu/libstdc++-v3/include/bits/basic_string.tcc:1051
(libstdc++.so.6+0x0000000cbaa8)
    #3 __libc_start_main <null> (libc.so.6+0x000000021ec4)

  Thread T1 (tid=98137, finished) created by main thread at:
    #0 pthread_create
../../../../libsanitizer/tsan/tsan_interceptors.cc:895
(libtsan.so.0+0x000000026d04)
    #1 __gthread_create
/usr/local/google/home/dvyukov/src/gcc/build/x86_64-pc-linux-gnu/libstdc++-v3/include/x86_64-pc-linux-gnu/bits/gthr-default.h:662
(libstdc++.so.6+0x0000000b6e52)
    #2 std::thread::_M_start_thread(std::shared_ptr<std::thread::_Impl_base>,
void (*)()) libstdc++-v3/src/c++11/thread.cc:149
(libstdc++.so.6+0x0000000b6e52)
    #3 __libc_start_main <null> (libc.so.6+0x000000021ec4)

OK for trunk?

Comments

Jonathan Wakely Sept. 1, 2015, 2:27 p.m. UTC | #1
On 01/09/15 14:51 +0200, Dmitry Vyukov wrote:
>Hello,
>
>The refcounted basic_string implementation contains several data races
>on _M_refcount:
>1. _M_is_leaked loads _M_refcount concurrently with mutations of
>_M_refcount. This loads needs to be memory_order_relaxed load, as
>_M_is_leaked predicate does not change under the feet.
>2. _M_is_shared loads _M_refcount concurrently with mutations of
>_M_refcount. This loads needs to be memory_order_acquire, as another
>thread can drop _M_refcount to zero concurrently which makes the
>string non-shared and so the current thread can mutate the string. We
>need reads of the string in another thread (while it was shared) to
>happen-before the writes to the string in this thread (now that it is
>non-shared).
>
>This patch adds __gnu_cxx::__atomic_load_dispatch function to do the
>loads of _M_refcount. The function does an acquire load. Acquire is
>non needed for _M_is_leaked, but for simplicity as still do acquire
>(hopefully the refcounted impl will go away in future).

It's unlikely to go away for a long time.

>This patch also uses the new function to do loads of _M_refcount in
>string implementation.
>
>I did non update doc/xml/manual/concurrency_extensions.xml to document
>__gnu_cxx::__atomic_load_dispatch, because I am not sure we want to
>extend that api now that we have language-level atomics. If you still
>want me to update it, please say how to regenerate
>doc/html/manual/ext_concurrency.html.

I don't know if we need the new __atomic_load_dispatch function at all,
we could just use atomic loads directly e.g.

         bool
 	_M_is_leaked() const _GLIBCXX_NOEXCEPT
#if defined(__GTHREADS) && defined(_GLIBCXX_ATOMIC_BUILTINS)
+       { return __atomic_load_n(&this->_M_refcount, __ATOMIC_RELAXED) < 0; }
+#else
        { return this->_M_refcount > 0; }
+#endif

         bool
 	_M_is_shared() const _GLIBCXX_NOEXCEPT
#if defined(__GTHREADS) && defined(_GLIBCXX_ATOMIC_BUILTINS)
+        { return __atomic_load(&this->_M_refcount, __ATOMIC_ACQUIRE) > 0; }
+#else
         { return this->_M_refcount > 0; }
+#endif

The __atomic_xxx_dispatch functions check __gthread_active_p() as an
optimisation to avoid potentially expensive atomic stores, but I don't
think we need that for atomic loads here, especially the relaxed load.
We could always add the dispatch in later if we get complaints about
single-threaded performance on targets where the loads are expensive.

This doesn't fix the problem for targets that don't define
_GLIBCXX_ATOMIC_BUILTINS but I don't know how many of them there are.
We could make it work on more targets by adding a new configure check
just for __atomic_load(int*, ...), because _GLIBCXX_ATOMIC_BUILTINS
requires several builtins to support various object sizes, but here we
don't need all of that.


>The race was detected with ThreadSanitizer on the following program:
>
>#define _GLIBCXX_USE_CXX11_ABI 0
>#include <string>
>#include <thread>
>#include <iostream>
>#include <chrono>
>
>int main() {
>  std::string s = "foo";
>  std::thread t([=](){
>    std::string x = s;
>    std::cout << &x << std::endl;
>  });
>  std::this_thread::sleep_for(std::chrono::seconds(1));
>  std::cout << &s[0] << std::endl;
>  t.join();
>}
>
>$ g++ string.cc -std=c++11 -pthread -O1 -g -fsanitize=thread
>$ ./a.out
>
>WARNING: ThreadSanitizer: data race (pid=98135)
>  Read of size 4 at 0x7d080000efd0 by main thread:
>    #0 std::string::_Rep::_M_is_leaked() const
>include/c++/6.0.0/bits/basic_string.h:2605 (a.out+0x000000401d7c)
>    #1 std::string::_M_leak()
>include/c++/6.0.0/bits/basic_string.h:2730 (a.out+0x000000401d7c)
>    #2 std::string::operator[](unsigned long)
>include/c++/6.0.0/bits/basic_string.h:3274 (a.out+0x000000401d7c)
>    #3 main /tmp/string.cc:14 (a.out+0x000000401d7c)
>
>  Previous atomic write of size 4 at 0x7d080000efd0 by thread T1:
>    #0 __tsan_atomic32_fetch_add
>../../../../libsanitizer/tsan/tsan_interface_atomic.cc:611
>(libtsan.so.0+0x00000005811f)
>    #1 __exchange_and_add include/c++/6.0.0/ext/atomicity.h:59
>(a.out+0x000000401a19)
>    #2 __exchange_and_add_dispatch
>include/c++/6.0.0/ext/atomicity.h:92 (a.out+0x000000401a19)
>    #3 std::string::_Rep::_M_dispose(std::allocator<char> const&)
>include/c++/6.0.0/bits/basic_string.h:2659 (a.out+0x000000401a19)
>    #4 std::basic_string<char, std::char_traits<char>,
>std::allocator<char> >::~basic_string()
>include/c++/6.0.0/bits/basic_string.h:2961 (a.out+0x000000401a19)
>    #5 ~<lambda> /tmp/string.cc:9 (a.out+0x000000401a19)
>    #6 ~_Head_base include/c++/6.0.0/tuple:102 (a.out+0x000000401a19)
>    #7 ~_Tuple_impl include/c++/6.0.0/tuple:338 (a.out+0x000000401a19)
>    #8 ~tuple include/c++/6.0.0/tuple:521 (a.out+0x000000401a19)
>    #9 ~_Bind_simple include/c++/6.0.0/functional:1503 (a.out+0x000000401a19)
>    #10 ~_Impl include/c++/6.0.0/thread:107 (a.out+0x000000401a19)
>    #11 destroy<std::thread::_Impl<std::_Bind_simple<main()::<lambda()>()>
>> > include/c++/6.0.0/ext/new_allocator.h:124 (a.out+0x0000004015c7)
>    #12 _S_destroy<std::allocator<std::thread::_Impl<std::_Bind_simple<main()::<lambda()>()>
>> >, std::thread::_Impl<std::_Bind_simple<main()::<lambda()>()> > >
>include/c++/6.0.0/bits/alloc_traits.h:236 (a.out+0x0000004015c7)
>    #13 destroy<std::thread::_Impl<std::_Bind_simple<main()::<lambda()>()>
>> > include/c++/6.0.0/bits/alloc_traits.h:336 (a.out+0x0000004015c7)
>    #14 _M_dispose include/c++/6.0.0/bits/shared_ptr_base.h:529
>(a.out+0x0000004015c7)
>    #15 std::_Sp_counted_base<(__gnu_cxx::_Lock_policy)2>::_M_release()
>/usr/local/google/home/dvyukov/src/gcc/build/x86_64-pc-linux-gnu/libstdc++-v3/include/bits/shared_ptr_base.h:150
>(libstdc++.so.6+0x0000000b6da4)
>    #16 std::__shared_count<(__gnu_cxx::_Lock_policy)2>::~__shared_count()
>/usr/local/google/home/dvyukov/src/gcc/build/x86_64-pc-linux-gnu/libstdc++-v3/include/bits/shared_ptr_base.h:662
>(libstdc++.so.6+0x0000000b6da4)
>    #17 std::__shared_ptr<std::thread::_Impl_base,
>(__gnu_cxx::_Lock_policy)2>::~__shared_ptr()
>/usr/local/google/home/dvyukov/src/gcc/build/x86_64-pc-linux-gnu/libstdc++-v3/include/bits/shared_ptr_base.h:928
>(libstdc++.so.6+0x0000000b6da4)
>    #18 std::shared_ptr<std::thread::_Impl_base>::~shared_ptr()
>/usr/local/google/home/dvyukov/src/gcc/build/x86_64-pc-linux-gnu/libstdc++-v3/include/bits/shared_ptr.h:93
>(libstdc++.so.6+0x0000000b6da4)
>    #19 execute_native_thread_routine
>libstdc++-v3/src/c++11/thread.cc:79 (libstdc++.so.6+0x0000000b6da4)
>
>  Location is heap block of size 28 at 0x7d080000efc0 allocated by main thread:
>    #0 operator new(unsigned long)
>../../../../libsanitizer/tsan/tsan_interceptors.cc:571
>(libtsan.so.0+0x0000000255a3)
>    #1 __gnu_cxx::new_allocator<char>::allocate(unsigned long, void
>const*) /usr/local/google/home/dvyukov/src/gcc/build/x86_64-pc-linux-gnu/libstdc++-v3/include/ext/new_allocator.h:104
>(libstdc++.so.6+0x0000000cbaa8)
>    #2 std::string::_Rep::_S_create(unsigned long, unsigned long,
>std::allocator<char> const&)
>/usr/local/google/home/dvyukov/src/gcc/build/x86_64-pc-linux-gnu/libstdc++-v3/include/bits/basic_string.tcc:1051
>(libstdc++.so.6+0x0000000cbaa8)
>    #3 __libc_start_main <null> (libc.so.6+0x000000021ec4)
>
>  Thread T1 (tid=98137, finished) created by main thread at:
>    #0 pthread_create
>../../../../libsanitizer/tsan/tsan_interceptors.cc:895
>(libtsan.so.0+0x000000026d04)
>    #1 __gthread_create
>/usr/local/google/home/dvyukov/src/gcc/build/x86_64-pc-linux-gnu/libstdc++-v3/include/x86_64-pc-linux-gnu/bits/gthr-default.h:662
>(libstdc++.so.6+0x0000000b6e52)
>    #2 std::thread::_M_start_thread(std::shared_ptr<std::thread::_Impl_base>,
>void (*)()) libstdc++-v3/src/c++11/thread.cc:149
>(libstdc++.so.6+0x0000000b6e52)
>    #3 __libc_start_main <null> (libc.so.6+0x000000021ec4)
>
>OK for trunk?

>Index: include/bits/basic_string.h
>===================================================================
>--- include/bits/basic_string.h	(revision 227363)
>+++ include/bits/basic_string.h	(working copy)
>@@ -2601,11 +2601,11 @@
>
>         bool
> 	_M_is_leaked() const _GLIBCXX_NOEXCEPT
>-        { return this->_M_refcount < 0; }
>+        { return __gnu_cxx::__atomic_load_dispatch(&this->_M_refcount) < 0; }
>
>         bool
> 	_M_is_shared() const _GLIBCXX_NOEXCEPT
>-        { return this->_M_refcount > 0; }
>+        { return __gnu_cxx::__atomic_load_dispatch(&this->_M_refcount) > 0; }
>
>         void
> 	_M_set_leaked() _GLIBCXX_NOEXCEPT
>Index: include/ext/atomicity.h
>===================================================================
>--- include/ext/atomicity.h	(revision 227363)
>+++ include/ext/atomicity.h	(working copy)
>@@ -35,6 +35,16 @@
> #include <bits/gthr.h>
> #include <bits/atomic_word.h>
>
>+// Even if the CPU doesn't need a memory barrier, we need to ensure
>+// that the compiler doesn't reorder memory accesses across the
>+// barriers.
>+#ifndef _GLIBCXX_READ_MEM_BARRIER
>+#define _GLIBCXX_READ_MEM_BARRIER __atomic_thread_fence (__ATOMIC_ACQUIRE)
>+#endif
>+#ifndef _GLIBCXX_WRITE_MEM_BARRIER
>+#define _GLIBCXX_WRITE_MEM_BARRIER __atomic_thread_fence (__ATOMIC_RELEASE)
>+#endif
>+
> namespace __gnu_cxx _GLIBCXX_VISIBILITY(default)
> {
> _GLIBCXX_BEGIN_NAMESPACE_VERSION
>@@ -50,7 +60,7 @@
>
>   static inline void
>   __atomic_add(volatile _Atomic_word* __mem, int __val)
>-  { __atomic_fetch_add(__mem, __val, __ATOMIC_ACQ_REL); }
>+  { __atomic_fetch_add(__mem, __val, __ATOMIC_RELEASE); }
> #else
>   _Atomic_word
>   __attribute__ ((__unused__))
>@@ -101,17 +111,27 @@
> #endif
>   }
>
>+  static inline _Atomic_word
>+  __attribute__ ((__unused__))
>+  __atomic_load_dispatch(const _Atomic_word* __mem)
>+  {
>+#ifdef __GTHREADS
>+    if (__gthread_active_p())
>+      {
>+#ifdef _GLIBCXX_ATOMIC_BUILTINS
>+        return __atomic_load_n(__mem, __ATOMIC_ACQUIRE);
>+#else
>+        // The best we can get with an old compiler.

We don't support old compilers in libstdc++ trunk, so this comment is
misleading. The fallback is needed for targets without support for all
the builtins, not for old compilers.

>+        _Atomic_word v = *(volatile _Atomic_word*)__mem;
>+        _GLIBCXX_READ_MEM_BARRIER;

If a target doesn't define _GLIBCXX_ATOMIC_BUILTINS do we know it will
define __atomic_thread_fence ?

I guess those targets should be redefining _GLIBCXX_READ_MEM_BARRIER
anyway.


>+        return v;
>+#endif
>+      }
>+#endif
>+    return *__mem;
>+  }
>+
> _GLIBCXX_END_NAMESPACE_VERSION
> } // namespace
>
>-// Even if the CPU doesn't need a memory barrier, we need to ensure
>-// that the compiler doesn't reorder memory accesses across the
>-// barriers.
>-#ifndef _GLIBCXX_READ_MEM_BARRIER
>-#define _GLIBCXX_READ_MEM_BARRIER __atomic_thread_fence (__ATOMIC_ACQUIRE)
>-#endif
>-#ifndef _GLIBCXX_WRITE_MEM_BARRIER
>-#define _GLIBCXX_WRITE_MEM_BARRIER __atomic_thread_fence (__ATOMIC_RELEASE)
>-#endif
>-
> #endif
Dmitry Vyukov Sept. 1, 2015, 2:56 p.m. UTC | #2
On Tue, Sep 1, 2015 at 4:27 PM, Jonathan Wakely <jwakely@redhat.com> wrote:
> On 01/09/15 14:51 +0200, Dmitry Vyukov wrote:
>>
>> Hello,
>>
>> The refcounted basic_string implementation contains several data races
>> on _M_refcount:
>> 1. _M_is_leaked loads _M_refcount concurrently with mutations of
>> _M_refcount. This loads needs to be memory_order_relaxed load, as
>> _M_is_leaked predicate does not change under the feet.
>> 2. _M_is_shared loads _M_refcount concurrently with mutations of
>> _M_refcount. This loads needs to be memory_order_acquire, as another
>> thread can drop _M_refcount to zero concurrently which makes the
>> string non-shared and so the current thread can mutate the string. We
>> need reads of the string in another thread (while it was shared) to
>> happen-before the writes to the string in this thread (now that it is
>> non-shared).
>>
>> This patch adds __gnu_cxx::__atomic_load_dispatch function to do the
>> loads of _M_refcount. The function does an acquire load. Acquire is
>> non needed for _M_is_leaked, but for simplicity as still do acquire
>> (hopefully the refcounted impl will go away in future).
>
>
> It's unlikely to go away for a long time.

ack

>> This patch also uses the new function to do loads of _M_refcount in
>> string implementation.
>>
>> I did non update doc/xml/manual/concurrency_extensions.xml to document
>> __gnu_cxx::__atomic_load_dispatch, because I am not sure we want to
>> extend that api now that we have language-level atomics. If you still
>> want me to update it, please say how to regenerate
>> doc/html/manual/ext_concurrency.html.
>
>
> I don't know if we need the new __atomic_load_dispatch function at all,
> we could just use atomic loads directly e.g.
>
>         bool
>         _M_is_leaked() const _GLIBCXX_NOEXCEPT
> #if defined(__GTHREADS) && defined(_GLIBCXX_ATOMIC_BUILTINS)
> +       { return __atomic_load_n(&this->_M_refcount, __ATOMIC_RELAXED) < 0;
> }
> +#else
>        { return this->_M_refcount > 0; }
> +#endif
>
>         bool
>         _M_is_shared() const _GLIBCXX_NOEXCEPT
> #if defined(__GTHREADS) && defined(_GLIBCXX_ATOMIC_BUILTINS)
> +        { return __atomic_load(&this->_M_refcount, __ATOMIC_ACQUIRE) > 0; }
> +#else
>         { return this->_M_refcount > 0; }
> +#endif

Looks better to me.
I wasn't sure why that dispatch indirection is there at all, so I did
what the current does.



> The __atomic_xxx_dispatch functions check __gthread_active_p() as an
> optimisation to avoid potentially expensive atomic stores, but I don't
> think we need that for atomic loads here, especially the relaxed load.
> We could always add the dispatch in later if we get complaints about
> single-threaded performance on targets where the loads are expensive.
>
> This doesn't fix the problem for targets that don't define
> _GLIBCXX_ATOMIC_BUILTINS but I don't know how many of them there are.
> We could make it work on more targets by adding a new configure check
> just for __atomic_load(int*, ...), because _GLIBCXX_ATOMIC_BUILTINS
> requires several builtins to support various object sizes, but here we
> don't need all of that.

I don't understand how a new gcc may not support __atomic builtins on
ints. How it is even possible? That's a portable API provided by
recent gcc's...


>> The race was detected with ThreadSanitizer on the following program:
>>
>> #define _GLIBCXX_USE_CXX11_ABI 0
>> #include <string>
>> #include <thread>
>> #include <iostream>
>> #include <chrono>
>>
>> int main() {
>>  std::string s = "foo";
>>  std::thread t([=](){
>>    std::string x = s;
>>    std::cout << &x << std::endl;
>>  });
>>  std::this_thread::sleep_for(std::chrono::seconds(1));
>>  std::cout << &s[0] << std::endl;
>>  t.join();
>> }
>>
>> $ g++ string.cc -std=c++11 -pthread -O1 -g -fsanitize=thread
>> $ ./a.out
>>
>> WARNING: ThreadSanitizer: data race (pid=98135)
>>  Read of size 4 at 0x7d080000efd0 by main thread:
>>    #0 std::string::_Rep::_M_is_leaked() const
>> include/c++/6.0.0/bits/basic_string.h:2605 (a.out+0x000000401d7c)
>>    #1 std::string::_M_leak()
>> include/c++/6.0.0/bits/basic_string.h:2730 (a.out+0x000000401d7c)
>>    #2 std::string::operator[](unsigned long)
>> include/c++/6.0.0/bits/basic_string.h:3274 (a.out+0x000000401d7c)
>>    #3 main /tmp/string.cc:14 (a.out+0x000000401d7c)
>>
>>  Previous atomic write of size 4 at 0x7d080000efd0 by thread T1:
>>    #0 __tsan_atomic32_fetch_add
>> ../../../../libsanitizer/tsan/tsan_interface_atomic.cc:611
>> (libtsan.so.0+0x00000005811f)
>>    #1 __exchange_and_add include/c++/6.0.0/ext/atomicity.h:59
>> (a.out+0x000000401a19)
>>    #2 __exchange_and_add_dispatch
>> include/c++/6.0.0/ext/atomicity.h:92 (a.out+0x000000401a19)
>>    #3 std::string::_Rep::_M_dispose(std::allocator<char> const&)
>> include/c++/6.0.0/bits/basic_string.h:2659 (a.out+0x000000401a19)
>>    #4 std::basic_string<char, std::char_traits<char>,
>> std::allocator<char> >::~basic_string()
>> include/c++/6.0.0/bits/basic_string.h:2961 (a.out+0x000000401a19)
>>    #5 ~<lambda> /tmp/string.cc:9 (a.out+0x000000401a19)
>>    #6 ~_Head_base include/c++/6.0.0/tuple:102 (a.out+0x000000401a19)
>>    #7 ~_Tuple_impl include/c++/6.0.0/tuple:338 (a.out+0x000000401a19)
>>    #8 ~tuple include/c++/6.0.0/tuple:521 (a.out+0x000000401a19)
>>    #9 ~_Bind_simple include/c++/6.0.0/functional:1503
>> (a.out+0x000000401a19)
>>    #10 ~_Impl include/c++/6.0.0/thread:107 (a.out+0x000000401a19)
>>    #11 destroy<std::thread::_Impl<std::_Bind_simple<main()::<lambda()>()>
>>>
>>> > include/c++/6.0.0/ext/new_allocator.h:124 (a.out+0x0000004015c7)
>>
>>    #12
>> _S_destroy<std::allocator<std::thread::_Impl<std::_Bind_simple<main()::<lambda()>()>
>>>
>>> >, std::thread::_Impl<std::_Bind_simple<main()::<lambda()>()> > >
>>
>> include/c++/6.0.0/bits/alloc_traits.h:236 (a.out+0x0000004015c7)
>>    #13 destroy<std::thread::_Impl<std::_Bind_simple<main()::<lambda()>()>
>>>
>>> > include/c++/6.0.0/bits/alloc_traits.h:336 (a.out+0x0000004015c7)
>>
>>    #14 _M_dispose include/c++/6.0.0/bits/shared_ptr_base.h:529
>> (a.out+0x0000004015c7)
>>    #15 std::_Sp_counted_base<(__gnu_cxx::_Lock_policy)2>::_M_release()
>>
>> /usr/local/google/home/dvyukov/src/gcc/build/x86_64-pc-linux-gnu/libstdc++-v3/include/bits/shared_ptr_base.h:150
>> (libstdc++.so.6+0x0000000b6da4)
>>    #16 std::__shared_count<(__gnu_cxx::_Lock_policy)2>::~__shared_count()
>>
>> /usr/local/google/home/dvyukov/src/gcc/build/x86_64-pc-linux-gnu/libstdc++-v3/include/bits/shared_ptr_base.h:662
>> (libstdc++.so.6+0x0000000b6da4)
>>    #17 std::__shared_ptr<std::thread::_Impl_base,
>> (__gnu_cxx::_Lock_policy)2>::~__shared_ptr()
>>
>> /usr/local/google/home/dvyukov/src/gcc/build/x86_64-pc-linux-gnu/libstdc++-v3/include/bits/shared_ptr_base.h:928
>> (libstdc++.so.6+0x0000000b6da4)
>>    #18 std::shared_ptr<std::thread::_Impl_base>::~shared_ptr()
>>
>> /usr/local/google/home/dvyukov/src/gcc/build/x86_64-pc-linux-gnu/libstdc++-v3/include/bits/shared_ptr.h:93
>> (libstdc++.so.6+0x0000000b6da4)
>>    #19 execute_native_thread_routine
>> libstdc++-v3/src/c++11/thread.cc:79 (libstdc++.so.6+0x0000000b6da4)
>>
>>  Location is heap block of size 28 at 0x7d080000efc0 allocated by main
>> thread:
>>    #0 operator new(unsigned long)
>> ../../../../libsanitizer/tsan/tsan_interceptors.cc:571
>> (libtsan.so.0+0x0000000255a3)
>>    #1 __gnu_cxx::new_allocator<char>::allocate(unsigned long, void
>> const*)
>> /usr/local/google/home/dvyukov/src/gcc/build/x86_64-pc-linux-gnu/libstdc++-v3/include/ext/new_allocator.h:104
>> (libstdc++.so.6+0x0000000cbaa8)
>>    #2 std::string::_Rep::_S_create(unsigned long, unsigned long,
>> std::allocator<char> const&)
>>
>> /usr/local/google/home/dvyukov/src/gcc/build/x86_64-pc-linux-gnu/libstdc++-v3/include/bits/basic_string.tcc:1051
>> (libstdc++.so.6+0x0000000cbaa8)
>>    #3 __libc_start_main <null> (libc.so.6+0x000000021ec4)
>>
>>  Thread T1 (tid=98137, finished) created by main thread at:
>>    #0 pthread_create
>> ../../../../libsanitizer/tsan/tsan_interceptors.cc:895
>> (libtsan.so.0+0x000000026d04)
>>    #1 __gthread_create
>>
>> /usr/local/google/home/dvyukov/src/gcc/build/x86_64-pc-linux-gnu/libstdc++-v3/include/x86_64-pc-linux-gnu/bits/gthr-default.h:662
>> (libstdc++.so.6+0x0000000b6e52)
>>    #2
>> std::thread::_M_start_thread(std::shared_ptr<std::thread::_Impl_base>,
>> void (*)()) libstdc++-v3/src/c++11/thread.cc:149
>> (libstdc++.so.6+0x0000000b6e52)
>>    #3 __libc_start_main <null> (libc.so.6+0x000000021ec4)
>>
>> OK for trunk?
>
>
>> Index: include/bits/basic_string.h
>> ===================================================================
>> --- include/bits/basic_string.h (revision 227363)
>> +++ include/bits/basic_string.h (working copy)
>> @@ -2601,11 +2601,11 @@
>>
>>         bool
>>         _M_is_leaked() const _GLIBCXX_NOEXCEPT
>> -        { return this->_M_refcount < 0; }
>> +        { return __gnu_cxx::__atomic_load_dispatch(&this->_M_refcount) <
>> 0; }
>>
>>         bool
>>         _M_is_shared() const _GLIBCXX_NOEXCEPT
>> -        { return this->_M_refcount > 0; }
>> +        { return __gnu_cxx::__atomic_load_dispatch(&this->_M_refcount) >
>> 0; }
>>
>>         void
>>         _M_set_leaked() _GLIBCXX_NOEXCEPT
>> Index: include/ext/atomicity.h
>> ===================================================================
>> --- include/ext/atomicity.h     (revision 227363)
>> +++ include/ext/atomicity.h     (working copy)
>> @@ -35,6 +35,16 @@
>> #include <bits/gthr.h>
>> #include <bits/atomic_word.h>
>>
>> +// Even if the CPU doesn't need a memory barrier, we need to ensure
>> +// that the compiler doesn't reorder memory accesses across the
>> +// barriers.
>> +#ifndef _GLIBCXX_READ_MEM_BARRIER
>> +#define _GLIBCXX_READ_MEM_BARRIER __atomic_thread_fence
>> (__ATOMIC_ACQUIRE)
>> +#endif
>> +#ifndef _GLIBCXX_WRITE_MEM_BARRIER
>> +#define _GLIBCXX_WRITE_MEM_BARRIER __atomic_thread_fence
>> (__ATOMIC_RELEASE)
>> +#endif
>> +
>> namespace __gnu_cxx _GLIBCXX_VISIBILITY(default)
>> {
>> _GLIBCXX_BEGIN_NAMESPACE_VERSION
>> @@ -50,7 +60,7 @@
>>
>>   static inline void
>>   __atomic_add(volatile _Atomic_word* __mem, int __val)
>> -  { __atomic_fetch_add(__mem, __val, __ATOMIC_ACQ_REL); }
>> +  { __atomic_fetch_add(__mem, __val, __ATOMIC_RELEASE); }
>> #else
>>   _Atomic_word
>>   __attribute__ ((__unused__))
>> @@ -101,17 +111,27 @@
>> #endif
>>   }
>>
>> +  static inline _Atomic_word
>> +  __attribute__ ((__unused__))
>> +  __atomic_load_dispatch(const _Atomic_word* __mem)
>> +  {
>> +#ifdef __GTHREADS
>> +    if (__gthread_active_p())
>> +      {
>> +#ifdef _GLIBCXX_ATOMIC_BUILTINS
>> +        return __atomic_load_n(__mem, __ATOMIC_ACQUIRE);
>> +#else
>> +        // The best we can get with an old compiler.
>
>
> We don't support old compilers in libstdc++ trunk, so this comment is
> misleading. The fallback is needed for targets without support for all
> the builtins, not for old compilers.
>
>> +        _Atomic_word v = *(volatile _Atomic_word*)__mem;
>> +        _GLIBCXX_READ_MEM_BARRIER;
>
>
> If a target doesn't define _GLIBCXX_ATOMIC_BUILTINS do we know it will
> define __atomic_thread_fence ?
>
> I guess those targets should be redefining _GLIBCXX_READ_MEM_BARRIER
> anyway.

Yeah, that was my understanding.
_GLIBCXX_READ_MEM_BARRIER is already intended to be a portable thing
whatever way it is implemented.


>> +        return v;
>> +#endif
>> +      }
>> +#endif
>> +    return *__mem;
>> +  }
>> +
>> _GLIBCXX_END_NAMESPACE_VERSION
>> } // namespace
>>
>> -// Even if the CPU doesn't need a memory barrier, we need to ensure
>> -// that the compiler doesn't reorder memory accesses across the
>> -// barriers.
>> -#ifndef _GLIBCXX_READ_MEM_BARRIER
>> -#define _GLIBCXX_READ_MEM_BARRIER __atomic_thread_fence
>> (__ATOMIC_ACQUIRE)
>> -#endif
>> -#ifndef _GLIBCXX_WRITE_MEM_BARRIER
>> -#define _GLIBCXX_WRITE_MEM_BARRIER __atomic_thread_fence
>> (__ATOMIC_RELEASE)
>> -#endif
>> -
>> #endif
>
>
Jonathan Wakely Sept. 1, 2015, 3:08 p.m. UTC | #3
On 01/09/15 16:56 +0200, Dmitry Vyukov wrote:
>I don't understand how a new gcc may not support __atomic builtins on
>ints. How it is even possible? That's a portable API provided by
>recent gcc's...

The built-in function is always defined, but it might expand to a call
to an external function in libatomic, and it would be a regression for
code using std::string to start requiring libatomic (although maybe it
would be necessary if it's the only way to make the code correct).

I don't know if there are any targets that define __GTHREADS and also
don't support __atomic_load(int*, ...) without libatomic. If such
targets exist then adding a new configure check that only depends on
__atomic_load(int*, ...) would mean we keep supporting those targets.

Another option would be to simply do:

         bool
         _M_is_shared() const _GLIBCXX_NOEXCEPT
#if defined(__GTHREADS)
+        { return __atomic_load(&this->_M_refcount, __ATOMIC_ACQUIRE) > 0; }
+#else
         { return this->_M_refcount > 0; }
+#endif

and see if anyone complains!
Marc Glisse Sept. 2, 2015, 10:58 a.m. UTC | #4
On Tue, 1 Sep 2015, Dmitry Vyukov wrote:

> The refcounted basic_string implementation contains several data races
> on _M_refcount:

There are several bug reports about races in basic_string in bugzilla 
(some might even have been closed as wontfix because of the new 
implementation). Does this also fix some of them?

(ChangeLog entry appears to be missing)
Dmitry Vyukov Sept. 2, 2015, 1:49 p.m. UTC | #5
On Wed, Sep 2, 2015 at 12:58 PM, Marc Glisse <marc.glisse@inria.fr> wrote:
> On Tue, 1 Sep 2015, Dmitry Vyukov wrote:
>
>> The refcounted basic_string implementation contains several data races
>> on _M_refcount:
>
>
> There are several bug reports about races in basic_string in bugzilla (some
> might even have been closed as wontfix because of the new implementation).
> Does this also fix some of them?

I've tried to search for "basic_string race" with all statuses:
https://gcc.gnu.org/bugzilla/buglist.cgi?bug_status=UNCONFIRMED&bug_status=NEW&bug_status=ASSIGNED&bug_status=SUSPENDED&bug_status=WAITING&bug_status=REOPENED&bug_status=RESOLVED&bug_status=VERIFIED&bug_status=CLOSED&cf_known_to_fail_type=allwords&cf_known_to_work_type=allwords&component=libstdc%2B%2B&list_id=125385&query_format=advanced&short_desc=basic_string%20race&short_desc_type=allwordssubstr

But it does not yield anything interesting. What bugs can I reference?
Jonathan Wakely Sept. 2, 2015, 2:05 p.m. UTC | #6
On 02/09/15 15:49 +0200, Dmitry Vyukov wrote:
>On Wed, Sep 2, 2015 at 12:58 PM, Marc Glisse <marc.glisse@inria.fr> wrote:
>> On Tue, 1 Sep 2015, Dmitry Vyukov wrote:
>>
>>> The refcounted basic_string implementation contains several data races
>>> on _M_refcount:
>>
>>
>> There are several bug reports about races in basic_string in bugzilla (some
>> might even have been closed as wontfix because of the new implementation).
>> Does this also fix some of them?
>
>I've tried to search for "basic_string race" with all statuses:
>https://gcc.gnu.org/bugzilla/buglist.cgi?bug_status=UNCONFIRMED&bug_status=NEW&bug_status=ASSIGNED&bug_status=SUSPENDED&bug_status=WAITING&bug_status=REOPENED&bug_status=RESOLVED&bug_status=VERIFIED&bug_status=CLOSED&cf_known_to_fail_type=allwords&cf_known_to_work_type=allwords&component=libstdc%2B%2B&list_id=125385&query_format=advanced&short_desc=basic_string%20race&short_desc_type=allwordssubstr
>
>But it does not yield anything interesting. What bugs can I reference?

There's https://gcc.gnu.org/bugzilla/show_bug.cgi?id=21334 but I don't
think this fixes it. That's a race condition in the logic, not a
formal data race due to non-atomic operations like the problem being
fixed here.
diff mbox

Patch

Index: include/bits/basic_string.h
===================================================================
--- include/bits/basic_string.h	(revision 227363)
+++ include/bits/basic_string.h	(working copy)
@@ -2601,11 +2601,11 @@ 
 
         bool
 	_M_is_leaked() const _GLIBCXX_NOEXCEPT
-        { return this->_M_refcount < 0; }
+        { return __gnu_cxx::__atomic_load_dispatch(&this->_M_refcount) < 0; }
 
         bool
 	_M_is_shared() const _GLIBCXX_NOEXCEPT
-        { return this->_M_refcount > 0; }
+        { return __gnu_cxx::__atomic_load_dispatch(&this->_M_refcount) > 0; }
 
         void
 	_M_set_leaked() _GLIBCXX_NOEXCEPT
Index: include/ext/atomicity.h
===================================================================
--- include/ext/atomicity.h	(revision 227363)
+++ include/ext/atomicity.h	(working copy)
@@ -35,6 +35,16 @@ 
 #include <bits/gthr.h>
 #include <bits/atomic_word.h>
 
+// Even if the CPU doesn't need a memory barrier, we need to ensure
+// that the compiler doesn't reorder memory accesses across the
+// barriers.
+#ifndef _GLIBCXX_READ_MEM_BARRIER
+#define _GLIBCXX_READ_MEM_BARRIER __atomic_thread_fence (__ATOMIC_ACQUIRE)
+#endif
+#ifndef _GLIBCXX_WRITE_MEM_BARRIER
+#define _GLIBCXX_WRITE_MEM_BARRIER __atomic_thread_fence (__ATOMIC_RELEASE)
+#endif
+
 namespace __gnu_cxx _GLIBCXX_VISIBILITY(default)
 {
 _GLIBCXX_BEGIN_NAMESPACE_VERSION
@@ -50,7 +60,7 @@ 
 
   static inline void
   __atomic_add(volatile _Atomic_word* __mem, int __val)
-  { __atomic_fetch_add(__mem, __val, __ATOMIC_ACQ_REL); }
+  { __atomic_fetch_add(__mem, __val, __ATOMIC_RELEASE); }
 #else
   _Atomic_word
   __attribute__ ((__unused__))
@@ -101,17 +111,27 @@ 
 #endif
   }
 
+  static inline _Atomic_word
+  __attribute__ ((__unused__))
+  __atomic_load_dispatch(const _Atomic_word* __mem)
+  {
+#ifdef __GTHREADS
+    if (__gthread_active_p())
+      {
+#ifdef _GLIBCXX_ATOMIC_BUILTINS
+        return __atomic_load_n(__mem, __ATOMIC_ACQUIRE);
+#else
+        // The best we can get with an old compiler.
+        _Atomic_word v = *(volatile _Atomic_word*)__mem;
+        _GLIBCXX_READ_MEM_BARRIER;
+        return v;
+#endif
+      }
+#endif
+    return *__mem;
+  }
+
 _GLIBCXX_END_NAMESPACE_VERSION
 } // namespace
 
-// Even if the CPU doesn't need a memory barrier, we need to ensure
-// that the compiler doesn't reorder memory accesses across the
-// barriers.
-#ifndef _GLIBCXX_READ_MEM_BARRIER
-#define _GLIBCXX_READ_MEM_BARRIER __atomic_thread_fence (__ATOMIC_ACQUIRE)
-#endif
-#ifndef _GLIBCXX_WRITE_MEM_BARRIER
-#define _GLIBCXX_WRITE_MEM_BARRIER __atomic_thread_fence (__ATOMIC_RELEASE)
-#endif
-
 #endif