Message ID | alpine.DEB.2.21.1807251350560.26789@stedding.saclay.inria.fr |
---|---|
State | New |
Headers | show |
Series | optimize std::vector move assignment | expand |
On 25/07/18 14:38 +0200, Marc Glisse wrote: >Hello, > >I talked about this last year >(https://gcc.gnu.org/ml/gcc-patches/2017-04/msg01063.html and thread), >this tweaks std::vector move assignment to help gcc generate better >code for it. Ah yes, thank for revisiting it. >For this code > >#include <vector> >#include <utility> >typedef std::vector<int> V; >void f(V&a,V&b){a=std::move(b);} > >with -O2 -fdump-tree-optimized on powerpc64le-unknown-linux-gnu, we >currently have > > _5 = MEM[(int * &)a_3(D)]; > MEM[(int * &)a_3(D)] = 0B; > MEM[(int * &)a_3(D) + 8] = 0B; > MEM[(int * &)a_3(D) + 16] = 0B; > _6 = MEM[(int * &)b_2(D)]; > MEM[(int * &)a_3(D)] = _6; > MEM[(int * &)b_2(D)] = 0B; > _7 = MEM[(int * &)a_3(D) + 8]; > _8 = MEM[(int * &)b_2(D) + 8]; > MEM[(int * &)a_3(D) + 8] = _8; > MEM[(int * &)b_2(D) + 8] = _7; > _9 = MEM[(int * &)a_3(D) + 16]; > _10 = MEM[(int * &)b_2(D) + 16]; > MEM[(int * &)a_3(D) + 16] = _10; > MEM[(int * &)b_2(D) + 16] = _9; > if (_5 != 0B) > >with the patch, we go down to > > _3 = MEM[(const struct _Vector_impl_data &)a_4(D)]._M_start; > _5 = MEM[(const struct _Vector_impl_data &)b_2(D)]._M_start; > MEM[(struct _Vector_impl_data *)a_4(D)]._M_start = _5; > _6 = MEM[(const struct _Vector_impl_data &)b_2(D)]._M_finish; > MEM[(struct _Vector_impl_data *)a_4(D)]._M_finish = _6; > _7 = MEM[(const struct _Vector_impl_data &)b_2(D)]._M_end_of_storage; > MEM[(struct _Vector_impl_data *)a_4(D)]._M_end_of_storage = _7; > MEM[(struct _Vector_impl_data *)b_2(D)]._M_start = 0B; > MEM[(struct _Vector_impl_data *)b_2(D)]._M_finish = 0B; > MEM[(struct _Vector_impl_data *)b_2(D)]._M_end_of_storage = 0B; > if (_3 != 0B) > >removing 2 reads and 3 writes. At -O3 we also get more vectorization. > >The main idea is to give the compiler more type information. >Currently, the only type information the compiler cares about is the >type used for a memory read/write. Using std::swap as before, the >reads/writes are done on int&. Doing it directly, they are done on >_Vector_impl_data::_M_start, a more precise information. Maybe some >day the compiler will get stricter and be able to deduce the same >information, but not yet. > >The second point is to rotate { new, old, tmp } in an order that's >simpler for the compiler. I was going to remove the swaps and use >_M_copy_data directly, but since doing the swaps in a different order >works... > >_M_copy_data is not really needed, we could add a defaulted assignment >operator, or remove the move constructor (and call a new _M_clear() >from the 2 users), etc. However, it seemed the least intrusive, the >least likely to have weird consequences. Yes, the alternative would be: --- a/libstdc++-v3/include/bits/stl_vector.h +++ b/libstdc++-v3/include/bits/stl_vector.h @@ -100,14 +100,17 @@ _GLIBCXX_BEGIN_NAMESPACE_CONTAINER : _M_start(__x._M_start), _M_finish(__x._M_finish), _M_end_of_storage(__x._M_end_of_storage) { __x._M_start = __x._M_finish = __x._M_end_of_storage = pointer(); } + + _Vector_impl_data(const _Vector_impl_data&) = default; + _Vector_impl_data& oeprator=(const _Vector_impl_data&) = default; #endif void _M_swap_data(_Vector_impl_data& __x) _GLIBCXX_NOEXCEPT { - std::swap(_M_start, __x._M_start); - std::swap(_M_finish, __x._M_finish); - std::swap(_M_end_of_storage, __x._M_end_of_storage); + _Vector_impl_data __tmp = *this; + *this = __x; + __x = __tmp; } }; Your _M_copy_data seems fine. It avoids unintentional copies, because the copy constructor and copy assignment operator remain deleted (at least in C++11). >I didn't add a testcase because I don't see any dg-final >scan-tree-dump in the libstdc++ testsuite. The closest would be >g++.dg/pr83239.C, g++.dg/vect/pr64410.cc, >g++.dg/vect/pr33426-ivdep-4.cc that include <vector>, but from >previous experience (already with vector), adding libstdc++ testcases >to the C++ testsuite is not very popular. Yes, C++ tests using std::vector are sometimes a bit fragile. I don't see any reason we can't use scan-tree-dump in the libstdc++ testsuite, if you wanted to add one. We do have other dg-final tests. >Index: libstdc++-v3/include/bits/stl_vector.h >=================================================================== >--- libstdc++-v3/include/bits/stl_vector.h (revision 262948) >+++ libstdc++-v3/include/bits/stl_vector.h (working copy) >@@ -96,25 +96,36 @@ _GLIBCXX_BEGIN_NAMESPACE_CONTAINER > { } > > #if __cplusplus >= 201103L > _Vector_impl_data(_Vector_impl_data&& __x) noexcept > : _M_start(__x._M_start), _M_finish(__x._M_finish), > _M_end_of_storage(__x._M_end_of_storage) > { __x._M_start = __x._M_finish = __x._M_end_of_storage = pointer(); } > #endif > > void >+ _M_copy_data(_Vector_impl_data const& __x) _GLIBCXX_NOEXCEPT >+ { >+ _M_start = __x._M_start; >+ _M_finish = __x._M_finish; >+ _M_end_of_storage = __x._M_end_of_storage; >+ } >+ >+ void > _M_swap_data(_Vector_impl_data& __x) _GLIBCXX_NOEXCEPT > { >- std::swap(_M_start, __x._M_start); >- std::swap(_M_finish, __x._M_finish); >- std::swap(_M_end_of_storage, __x._M_end_of_storage); >+ // Do not use std::swap(_M_start, __x._M_start), etc as it looses s/looses/loses/ OK with that spelling fix, thanks!
On Wed, 25 Jul 2018, Jonathan Wakely wrote: >> _M_copy_data is not really needed, we could add a defaulted assignment >> operator, or remove the move constructor (and call a new _M_clear() from >> the 2 users), etc. However, it seemed the least intrusive, the least likely >> to have weird consequences. > > Yes, the alternative would be: > > --- a/libstdc++-v3/include/bits/stl_vector.h > +++ b/libstdc++-v3/include/bits/stl_vector.h > @@ -100,14 +100,17 @@ _GLIBCXX_BEGIN_NAMESPACE_CONTAINER > : _M_start(__x._M_start), _M_finish(__x._M_finish), > _M_end_of_storage(__x._M_end_of_storage) > { __x._M_start = __x._M_finish = __x._M_end_of_storage = pointer(); } > + > + _Vector_impl_data(const _Vector_impl_data&) = default; > + _Vector_impl_data& oeprator=(const _Vector_impl_data&) = default; > #endif > > void > _M_swap_data(_Vector_impl_data& __x) _GLIBCXX_NOEXCEPT > { > - std::swap(_M_start, __x._M_start); > - std::swap(_M_finish, __x._M_finish); > - std::swap(_M_end_of_storage, __x._M_end_of_storage); > + _Vector_impl_data __tmp = *this; > + *this = __x; > + __x = __tmp; Or just std::swap(*this, __x). > } > }; > > Your _M_copy_data seems fine. It avoids unintentional copies, because > the copy constructor and copy assignment operator remain deleted (at > least in C++11). > >> I didn't add a testcase because I don't see any dg-final scan-tree-dump in >> the libstdc++ testsuite. The closest would be g++.dg/pr83239.C, >> g++.dg/vect/pr64410.cc, g++.dg/vect/pr33426-ivdep-4.cc that include >> <vector>, but from previous experience (already with vector), adding >> libstdc++ testcases to the C++ testsuite is not very popular. > > Yes, C++ tests using std::vector are sometimes a bit fragile. > > I don't see any reason we can't use scan-tree-dump in the libstdc++ > testsuite, if you wanted to add one. We do have other dg-final tests. The others only test for the presence of some name in assembly. But I may try it later.
Index: libstdc++-v3/include/bits/stl_vector.h =================================================================== --- libstdc++-v3/include/bits/stl_vector.h (revision 262948) +++ libstdc++-v3/include/bits/stl_vector.h (working copy) @@ -96,25 +96,36 @@ _GLIBCXX_BEGIN_NAMESPACE_CONTAINER { } #if __cplusplus >= 201103L _Vector_impl_data(_Vector_impl_data&& __x) noexcept : _M_start(__x._M_start), _M_finish(__x._M_finish), _M_end_of_storage(__x._M_end_of_storage) { __x._M_start = __x._M_finish = __x._M_end_of_storage = pointer(); } #endif void + _M_copy_data(_Vector_impl_data const& __x) _GLIBCXX_NOEXCEPT + { + _M_start = __x._M_start; + _M_finish = __x._M_finish; + _M_end_of_storage = __x._M_end_of_storage; + } + + void _M_swap_data(_Vector_impl_data& __x) _GLIBCXX_NOEXCEPT { - std::swap(_M_start, __x._M_start); - std::swap(_M_finish, __x._M_finish); - std::swap(_M_end_of_storage, __x._M_end_of_storage); + // Do not use std::swap(_M_start, __x._M_start), etc as it looses + // information used by TBAA. + _Vector_impl_data __tmp; + __tmp._M_copy_data(*this); + _M_copy_data(__x); + __x._M_copy_data(__tmp); } }; struct _Vector_impl : public _Tp_alloc_type, public _Vector_impl_data { _Vector_impl() _GLIBCXX_NOEXCEPT_IF( noexcept(_Tp_alloc_type()) ) : _Tp_alloc_type() { } @@ -1724,22 +1735,22 @@ _GLIBCXX_BEGIN_NAMESPACE_CONTAINER #if __cplusplus >= 201103L private: // Constant-time move assignment when source object's memory can be // moved, either because the source's allocator will move too // or because the allocators are equal. void _M_move_assign(vector&& __x, true_type) noexcept { vector __tmp(get_allocator()); - this->_M_impl._M_swap_data(__tmp._M_impl); this->_M_impl._M_swap_data(__x._M_impl); + __tmp._M_impl._M_swap_data(__x._M_impl); std::__alloc_on_move(_M_get_Tp_allocator(), __x._M_get_Tp_allocator()); } // Do move assignment when it might not be possible to move source // object's memory, resulting in a linear-time operation. void _M_move_assign(vector&& __x, false_type) { if (__x._M_get_Tp_allocator() == this->_M_get_Tp_allocator()) _M_move_assign(std::move(__x), true_type());