Message ID | 20181001220831.7420-1-hjl.tools@gmail.com |
---|---|
State | New |
Headers | show |
Series | x86: Use RTM intrinsics in pthread mutex lock elision | expand |
On 01/10/2018 19:08, H.J. Lu wrote: > Since RTM intrinsics are supported in GCC 4.9, we can use them in > pthread mutex lock elision. > > * sysdeps/unix/sysv/linux/x86/Makefile (CFLAGS-elision-lock.c): > Add -mrtm. > (CFLAGS-elision-unlock.c): Likewise. > (CFLAGS-elision-timed.c): Likewise. > (CFLAGS-elision-trylock.c): Likewise. > * sysdeps/unix/sysv/linux/x86/hle.h: Rewritten. LGTM, thanks. > --- > sysdeps/unix/sysv/linux/x86/Makefile | 4 ++ > sysdeps/unix/sysv/linux/x86/hle.h | 70 ++-------------------------- > 2 files changed, 7 insertions(+), 67 deletions(-) > > diff --git a/sysdeps/unix/sysv/linux/x86/Makefile b/sysdeps/unix/sysv/linux/x86/Makefile > index 7dc4e61756..02ca36c6d2 100644 > --- a/sysdeps/unix/sysv/linux/x86/Makefile > +++ b/sysdeps/unix/sysv/linux/x86/Makefile > @@ -14,6 +14,10 @@ endif > ifeq ($(subdir),nptl) > libpthread-sysdep_routines += elision-lock elision-unlock elision-timed \ > elision-trylock > +CFLAGS-elision-lock.c += -mrtm > +CFLAGS-elision-unlock.c += -mrtm > +CFLAGS-elision-timed.c += -mrtm > +CFLAGS-elision-trylock.c += -mrtm > endif > > ifeq ($(subdir),elf) > diff --git a/sysdeps/unix/sysv/linux/x86/hle.h b/sysdeps/unix/sysv/linux/x86/hle.h > index 4a7b9e3bf7..0449026839 100644 > --- a/sysdeps/unix/sysv/linux/x86/hle.h > +++ b/sysdeps/unix/sysv/linux/x86/hle.h > @@ -1,75 +1,11 @@ > -/* Shared RTM header. Emulate TSX intrinsics for compilers and assemblers > - that do not support the intrinsics and instructions yet. */ > +/* Shared RTM header. */ > #ifndef _HLE_H > #define _HLE_H 1 > > -#ifdef __ASSEMBLER__ > +#include <x86intrin.h> Is it used in any configuration in assembly code?
On Tue, Oct 2, 2018 at 6:06 AM Adhemerval Zanella <adhemerval.zanella@linaro.org> wrote: > > > > On 01/10/2018 19:08, H.J. Lu wrote: > > Since RTM intrinsics are supported in GCC 4.9, we can use them in > > pthread mutex lock elision. > > > > * sysdeps/unix/sysv/linux/x86/Makefile (CFLAGS-elision-lock.c): > > Add -mrtm. > > (CFLAGS-elision-unlock.c): Likewise. > > (CFLAGS-elision-timed.c): Likewise. > > (CFLAGS-elision-trylock.c): Likewise. > > * sysdeps/unix/sysv/linux/x86/hle.h: Rewritten. > > LGTM, thanks. > > > --- > > sysdeps/unix/sysv/linux/x86/Makefile | 4 ++ > > sysdeps/unix/sysv/linux/x86/hle.h | 70 ++-------------------------- > > 2 files changed, 7 insertions(+), 67 deletions(-) > > > > diff --git a/sysdeps/unix/sysv/linux/x86/Makefile b/sysdeps/unix/sysv/linux/x86/Makefile > > index 7dc4e61756..02ca36c6d2 100644 > > --- a/sysdeps/unix/sysv/linux/x86/Makefile > > +++ b/sysdeps/unix/sysv/linux/x86/Makefile > > @@ -14,6 +14,10 @@ endif > > ifeq ($(subdir),nptl) > > libpthread-sysdep_routines += elision-lock elision-unlock elision-timed \ > > elision-trylock > > +CFLAGS-elision-lock.c += -mrtm > > +CFLAGS-elision-unlock.c += -mrtm > > +CFLAGS-elision-timed.c += -mrtm > > +CFLAGS-elision-trylock.c += -mrtm > > endif > > > > ifeq ($(subdir),elf) > > diff --git a/sysdeps/unix/sysv/linux/x86/hle.h b/sysdeps/unix/sysv/linux/x86/hle.h > > index 4a7b9e3bf7..0449026839 100644 > > --- a/sysdeps/unix/sysv/linux/x86/hle.h > > +++ b/sysdeps/unix/sysv/linux/x86/hle.h > > @@ -1,75 +1,11 @@ > > -/* Shared RTM header. Emulate TSX intrinsics for compilers and assemblers > > - that do not support the intrinsics and instructions yet. */ > > +/* Shared RTM header. */ > > #ifndef _HLE_H > > #define _HLE_H 1 > > > > -#ifdef __ASSEMBLER__ > > +#include <x86intrin.h> > > Is it used in any configuration in assembly code? No: sysdeps/unix/sysv/linux/x86/elision-lock.c:#include "hle.h" sysdeps/unix/sysv/linux/x86/elision-trylock.c:#include "hle.h" sysdeps/unix/sysv/linux/x86/elision-unlock.c:#include "hle.h" sysdeps/x86/elide.h:#include <hle.h> BTW, elide.h isn't used anywhere.
On 02/10/2018 10:49, H.J. Lu wrote: > On Tue, Oct 2, 2018 at 6:06 AM Adhemerval Zanella > <adhemerval.zanella@linaro.org> wrote: >> >> >> >> On 01/10/2018 19:08, H.J. Lu wrote: >>> Since RTM intrinsics are supported in GCC 4.9, we can use them in >>> pthread mutex lock elision. >>> >>> * sysdeps/unix/sysv/linux/x86/Makefile (CFLAGS-elision-lock.c): >>> Add -mrtm. >>> (CFLAGS-elision-unlock.c): Likewise. >>> (CFLAGS-elision-timed.c): Likewise. >>> (CFLAGS-elision-trylock.c): Likewise. >>> * sysdeps/unix/sysv/linux/x86/hle.h: Rewritten. >> >> LGTM, thanks. >> >>> --- >>> sysdeps/unix/sysv/linux/x86/Makefile | 4 ++ >>> sysdeps/unix/sysv/linux/x86/hle.h | 70 ++-------------------------- >>> 2 files changed, 7 insertions(+), 67 deletions(-) >>> >>> diff --git a/sysdeps/unix/sysv/linux/x86/Makefile b/sysdeps/unix/sysv/linux/x86/Makefile >>> index 7dc4e61756..02ca36c6d2 100644 >>> --- a/sysdeps/unix/sysv/linux/x86/Makefile >>> +++ b/sysdeps/unix/sysv/linux/x86/Makefile >>> @@ -14,6 +14,10 @@ endif >>> ifeq ($(subdir),nptl) >>> libpthread-sysdep_routines += elision-lock elision-unlock elision-timed \ >>> elision-trylock >>> +CFLAGS-elision-lock.c += -mrtm >>> +CFLAGS-elision-unlock.c += -mrtm >>> +CFLAGS-elision-timed.c += -mrtm >>> +CFLAGS-elision-trylock.c += -mrtm >>> endif >>> >>> ifeq ($(subdir),elf) >>> diff --git a/sysdeps/unix/sysv/linux/x86/hle.h b/sysdeps/unix/sysv/linux/x86/hle.h >>> index 4a7b9e3bf7..0449026839 100644 >>> --- a/sysdeps/unix/sysv/linux/x86/hle.h >>> +++ b/sysdeps/unix/sysv/linux/x86/hle.h >>> @@ -1,75 +1,11 @@ >>> -/* Shared RTM header. Emulate TSX intrinsics for compilers and assemblers >>> - that do not support the intrinsics and instructions yet. */ >>> +/* Shared RTM header. */ >>> #ifndef _HLE_H >>> #define _HLE_H 1 >>> >>> -#ifdef __ASSEMBLER__ >>> +#include <x86intrin.h> >> >> Is it used in any configuration in assembly code? > > No: > > sysdeps/unix/sysv/linux/x86/elision-lock.c:#include "hle.h" > sysdeps/unix/sysv/linux/x86/elision-trylock.c:#include "hle.h" > sysdeps/unix/sysv/linux/x86/elision-unlock.c:#include "hle.h" > sysdeps/x86/elide.h:#include <hle.h> > > BTW, elide.h isn't used anywhere. > It was used by HTM lock elision on pthread_rwlock_* before new implementation (cc25c8b4c1196a8c29e9a45b1e096b99a87b7f8c). Andrew Senkevich has sent a patch [1] to re-enable HTM rwlock elision, but his own performance results seem mixed. I think we can safely remove all elide.h files from now. [1] https://sourceware.org/ml/libc-alpha/2017-04/msg00067.html
diff --git a/sysdeps/unix/sysv/linux/x86/Makefile b/sysdeps/unix/sysv/linux/x86/Makefile index 7dc4e61756..02ca36c6d2 100644 --- a/sysdeps/unix/sysv/linux/x86/Makefile +++ b/sysdeps/unix/sysv/linux/x86/Makefile @@ -14,6 +14,10 @@ endif ifeq ($(subdir),nptl) libpthread-sysdep_routines += elision-lock elision-unlock elision-timed \ elision-trylock +CFLAGS-elision-lock.c += -mrtm +CFLAGS-elision-unlock.c += -mrtm +CFLAGS-elision-timed.c += -mrtm +CFLAGS-elision-trylock.c += -mrtm endif ifeq ($(subdir),elf) diff --git a/sysdeps/unix/sysv/linux/x86/hle.h b/sysdeps/unix/sysv/linux/x86/hle.h index 4a7b9e3bf7..0449026839 100644 --- a/sysdeps/unix/sysv/linux/x86/hle.h +++ b/sysdeps/unix/sysv/linux/x86/hle.h @@ -1,75 +1,11 @@ -/* Shared RTM header. Emulate TSX intrinsics for compilers and assemblers - that do not support the intrinsics and instructions yet. */ +/* Shared RTM header. */ #ifndef _HLE_H #define _HLE_H 1 -#ifdef __ASSEMBLER__ +#include <x86intrin.h> -.macro XBEGIN target - .byte 0xc7,0xf8 - .long \target-1f -1: -.endm - -.macro XEND - .byte 0x0f,0x01,0xd5 -.endm - -.macro XABORT code - .byte 0xc6,0xf8,\code -.endm - -.macro XTEST - .byte 0x0f,0x01,0xd6 -.endm - -#endif - -/* Official RTM intrinsics interface matching gcc/icc, but works - on older gcc compatible compilers and binutils. - We should somehow detect if the compiler supports it, because - it may be able to generate slightly better code. */ - -#define _XBEGIN_STARTED (~0u) -#define _XABORT_EXPLICIT (1 << 0) -#define _XABORT_RETRY (1 << 1) -#define _XABORT_CONFLICT (1 << 2) -#define _XABORT_CAPACITY (1 << 3) -#define _XABORT_DEBUG (1 << 4) -#define _XABORT_NESTED (1 << 5) -#define _XABORT_CODE(x) (((x) >> 24) & 0xff) - -#define _ABORT_LOCK_BUSY 0xff +#define _ABORT_LOCK_BUSY 0xff #define _ABORT_LOCK_IS_LOCKED 0xfe #define _ABORT_NESTED_TRYLOCK 0xfd -#ifndef __ASSEMBLER__ - -#define __force_inline __attribute__((__always_inline__)) inline - -static __force_inline int _xbegin(void) -{ - int ret = _XBEGIN_STARTED; - asm volatile (".byte 0xc7,0xf8 ; .long 0" : "+a" (ret) :: "memory"); - return ret; -} - -static __force_inline void _xend(void) -{ - asm volatile (".byte 0x0f,0x01,0xd5" ::: "memory"); -} - -static __force_inline void _xabort(const unsigned int status) -{ - asm volatile (".byte 0xc6,0xf8,%P0" :: "i" (status) : "memory"); -} - -static __force_inline int _xtest(void) -{ - unsigned char out; - asm volatile (".byte 0x0f,0x01,0xd6 ; setnz %0" : "=r" (out) :: "memory"); - return out; -} - -#endif #endif