diff mbox

[roland/vdso_clock_gettime] x86: Clean up __vdso_clock_gettime variable.

Message ID 20150129005747.3DAE82C3A92@topped-with-meat.com
State New
Headers show

Commit Message

Roland McGrath Jan. 29, 2015, 12:57 a.m. UTC
__vdso_clock_gettime is never actually used outside libc proper.  It
appeared to be if one grep'd the source, but the only apparent use was in
dead code in x86_64/pthread_cond_timedwait.S; that code is never used in
any configuration any more, because __ASSUME_FUTEX_CLOCK_REALTIME is
defined unconditionally.

The part of this patch that appears substantial is just removing that dead
assembly code.  In fact, it does not change the code that gets assembled at
all.

On both x86_64-linux-gnu and i686-linux-gnu I tested that check-abi has no
regressions and that there are no significant changes to generated code.
The only changes at all are places that were suboptimally using a GOT slot
for __vdso_clock_gettime because the declaration in scope wasn't marked as
hidden.  

This only came about because of the highly suspicious situation that
libc_hidden_proto was put next to the definition site rather than in the
header file after the declaration.  Hackers and reviewers should raise a
red flag for any case of *hidden_proto appearing anywhere but in a header
file.

Assuming no objections, I'll commit this as soon as the freeze is lifted.
I think it would be safe and probably even wise to do it for 2.21, if
Carlos concurs.  This is new code added since 2.20, and without this change
it both is trivially suboptimal and bloats .dynsym with a wholly
unnecessary symbol (but a GLIBC_PRIVATE one, so it's not an ABI issue).


Thanks,
Roland


2015-01-28  Roland McGrath  <roland@hack.frob.com>

	* sysdeps/unix/sysv/linux/x86_64/pthread_cond_timedwait.S: Remove all
	code under [!__ASSUME_FUTEX_CLOCK_REALTIME], since that is defined
	unconditionally nowadays.  This included the only reference to
	__vdso_clock_gettime that appears outside libc proper.
	* sysdeps/unix/sysv/linux/x86_64/Versions (libc: GLIBC_PRIVATE):
	Remove version set (containing only __vdso_clock_gettime).
	* sysdeps/unix/sysv/linux/x86/libc-vdso.h (__vdso_clock_gettime):
	Add attribute_hidden.
	* sysdeps/unix/sysv/linux/i386/init-first.c (__vdso_clock_gettime):
	Likewise.  Drop __attribute__ ((nocommon)), libc_hidden_proto, and
	libc_hidden_data_def.
	* sysdeps/unix/sysv/linux/x86_64/init-first.c: Likewise.
	* sysdeps/unix/sysv/linux/x86_64/x32/init-first.c: Likewise.

Comments

Adhemerval Zanella Jan. 29, 2015, 11:18 a.m. UTC | #1
On 28-01-2015 22:57, Roland McGrath wrote:
> __vdso_clock_gettime is never actually used outside libc proper.  It
> appeared to be if one grep'd the source, but the only apparent use was in
> dead code in x86_64/pthread_cond_timedwait.S; that code is never used in
> any configuration any more, because __ASSUME_FUTEX_CLOCK_REALTIME is
> defined unconditionally.
>
> The part of this patch that appears substantial is just removing that dead
> assembly code.  In fact, it does not change the code that gets assembled at
> all.
>
> On both x86_64-linux-gnu and i686-linux-gnu I tested that check-abi has no
> regressions and that there are no significant changes to generated code.
> The only changes at all are places that were suboptimally using a GOT slot
> for __vdso_clock_gettime because the declaration in scope wasn't marked as
> hidden.  
>
> This only came about because of the highly suspicious situation that
> libc_hidden_proto was put next to the definition site rather than in the
> header file after the declaration.  Hackers and reviewers should raise a
> red flag for any case of *hidden_proto appearing anywhere but in a header
> file.
>
> Assuming no objections, I'll commit this as soon as the freeze is lifted.
> I think it would be safe and probably even wise to do it for 2.21, if
> Carlos concurs.  This is new code added since 2.20, and without this change
> it both is trivially suboptimal and bloats .dynsym with a wholly
> unnecessary symbol (but a GLIBC_PRIVATE one, so it's not an ABI issue).

Since you are modifying the code, which are the advantages and constrains to
x86_64/i386 carry specific assembly implementation for pthread_cond_* 
function? What prevent them to use the C default code? 

>
>
> Thanks,
> Roland
>
>
> 2015-01-28  Roland McGrath  <roland@hack.frob.com>
>
> 	* sysdeps/unix/sysv/linux/x86_64/pthread_cond_timedwait.S: Remove all
> 	code under [!__ASSUME_FUTEX_CLOCK_REALTIME], since that is defined
> 	unconditionally nowadays.  This included the only reference to
> 	__vdso_clock_gettime that appears outside libc proper.
> 	* sysdeps/unix/sysv/linux/x86_64/Versions (libc: GLIBC_PRIVATE):
> 	Remove version set (containing only __vdso_clock_gettime).
> 	* sysdeps/unix/sysv/linux/x86/libc-vdso.h (__vdso_clock_gettime):
> 	Add attribute_hidden.
> 	* sysdeps/unix/sysv/linux/i386/init-first.c (__vdso_clock_gettime):
> 	Likewise.  Drop __attribute__ ((nocommon)), libc_hidden_proto, and
> 	libc_hidden_data_def.
> 	* sysdeps/unix/sysv/linux/x86_64/init-first.c: Likewise.
> 	* sysdeps/unix/sysv/linux/x86_64/x32/init-first.c: Likewise.
>
> --- a/sysdeps/unix/sysv/linux/i386/init-first.c
> +++ b/sysdeps/unix/sysv/linux/i386/init-first.c
> @@ -23,9 +23,7 @@
>  # include <libc-vdso.h>
>
>  long int (*__vdso_clock_gettime) (clockid_t, struct timespec *)
> -  __attribute__ ((nocommon));
> -libc_hidden_proto (__vdso_clock_gettime)
> -libc_hidden_data_def (__vdso_clock_gettime)
> +  attribute_hidden;
>
>  static long int
>  clock_gettime_syscall (clockid_t id, struct timespec *tp)
> --- a/sysdeps/unix/sysv/linux/x86/libc-vdso.h
> +++ b/sysdeps/unix/sysv/linux/x86/libc-vdso.h
> @@ -24,7 +24,8 @@
>
>  #ifdef SHARED
>
> -extern long int (*__vdso_clock_gettime) (clockid_t, struct timespec *);
> +extern long int (*__vdso_clock_gettime) (clockid_t, struct timespec *)
> +  attribute_hidden;
>
>  #endif
>
> --- a/sysdeps/unix/sysv/linux/x86_64/Versions
> +++ b/sysdeps/unix/sysv/linux/x86_64/Versions
> @@ -6,9 +6,6 @@ libc {
>
>      modify_ldt;
>    }
> -  GLIBC_PRIVATE {
> -    __vdso_clock_gettime;
> -  }
>  }
>
>  librt {
> --- a/sysdeps/unix/sysv/linux/x86_64/init-first.c
> +++ b/sysdeps/unix/sysv/linux/x86_64/init-first.c
> @@ -23,10 +23,7 @@
>  # include <libc-vdso.h>
>
>  long int (*__vdso_clock_gettime) (clockid_t, struct timespec *)
> -  __attribute__ ((nocommon));
> -libc_hidden_proto (__vdso_clock_gettime)
> -libc_hidden_data_def (__vdso_clock_gettime)
> -
> +  attribute_hidden;
>  long int (*__vdso_getcpu) (unsigned *, unsigned *, void *) attribute_hidden;
>
>  extern long int __syscall_clock_gettime (clockid_t, struct timespec *);
> --- a/sysdeps/unix/sysv/linux/x86_64/pthread_cond_timedwait.S
> +++ b/sysdeps/unix/sysv/linux/x86_64/pthread_cond_timedwait.S
> @@ -59,11 +59,7 @@ __pthread_cond_timedwait:
>  	pushq	%r15
>  	cfi_adjust_cfa_offset(8)
>  	cfi_rel_offset(%r15, 0)
> -#ifdef __ASSUME_FUTEX_CLOCK_REALTIME
> -# define FRAME_SIZE (32+8)
> -#else
> -# define FRAME_SIZE (48+8)
> -#endif
> +#define FRAME_SIZE (32+8)
>  	subq	$FRAME_SIZE, %rsp
>  	cfi_adjust_cfa_offset(FRAME_SIZE)
>  	cfi_remember_state
> @@ -105,15 +101,6 @@ __pthread_cond_timedwait:
>  22:
>  	xorb	%r15b, %r15b
>
> -#ifndef __ASSUME_FUTEX_CLOCK_REALTIME
> -#  ifdef PIC
> -	cmpl	$0, __have_futex_clock_realtime(%rip)
> -#  else
> -	cmpl	$0, __have_futex_clock_realtime
> -#  endif
> -	je	.Lreltmo
> -#endif
> -
>  	/* Get internal lock.  */
>  	movl	$1, %esi
>  	xorl	%eax, %eax
> @@ -440,204 +427,6 @@ __pthread_cond_timedwait:
>  47:	movq	(%rsp), %rax
>  	jmp	48b
>
> -
> -#ifndef __ASSUME_FUTEX_CLOCK_REALTIME
> -.Lreltmo:
> -	/* Get internal lock.  */
> -	movl	$1, %esi
> -	xorl	%eax, %eax
> -	LOCK
> -# if cond_lock == 0
> -	cmpxchgl %esi, (%rdi)
> -# else
> -	cmpxchgl %esi, cond_lock(%rdi)
> -# endif
> -	jnz	1f
> -
> -	/* Unlock the mutex.  */
> -2:	movq	16(%rsp), %rdi
> -	xorl	%esi, %esi
> -	callq	__pthread_mutex_unlock_usercnt
> -
> -	testl	%eax, %eax
> -	jne	46b
> -
> -	movq	8(%rsp), %rdi
> -	incq	total_seq(%rdi)
> -	incl	cond_futex(%rdi)
> -	addl	$(1 << nwaiters_shift), cond_nwaiters(%rdi)
> -
> -	/* Get and store current wakeup_seq value.  */
> -	movq	8(%rsp), %rdi
> -	movq	wakeup_seq(%rdi), %r9
> -	movl	broadcast_seq(%rdi), %edx
> -	movq	%r9, 24(%rsp)
> -	movl	%edx, 4(%rsp)
> -
> -	/* Get the current time.  */
> -8:
> -# ifdef __NR_clock_gettime
> -	/* Get the clock number.  Note that the field in the condvar
> -	   structure stores the number minus 1.  */
> -	movq	8(%rsp), %rdi
> -	movl	cond_nwaiters(%rdi), %edi
> -	andl	$((1 << nwaiters_shift) - 1), %edi
> -	/* Only clocks 0 and 1 are allowed so far.  Both are handled in the
> -	   kernel.  */
> -	leaq	32(%rsp), %rsi
> -#  ifdef SHARED
> -	mov	__vdso_clock_gettime@GOTPCREL(%rip), %RAX_LP
> -	mov	(%rax), %RAX_LP
> -	PTR_DEMANGLE (%RAX_LP)
> -	call	*%rax
> -#  else
> -	movl	$__NR_clock_gettime, %eax
> -	syscall
> -#  endif
> -
> -	/* Compute relative timeout.  */
> -	movq	(%r13), %rcx
> -	movq	8(%r13), %rdx
> -	subq	32(%rsp), %rcx
> -	subq	40(%rsp), %rdx
> -# else
> -	leaq	24(%rsp), %rdi
> -	xorl	%esi, %esi
> -	/* This call works because we directly jump to a system call entry
> -	   which preserves all the registers.  */
> -	call	JUMPTARGET(__gettimeofday)
> -
> -	/* Compute relative timeout.  */
> -	movq	40(%rsp), %rax
> -	movl	$1000, %edx
> -	mul	%rdx		/* Milli seconds to nano seconds.  */
> -	movq	(%r13), %rcx
> -	movq	8(%r13), %rdx
> -	subq	32(%rsp), %rcx
> -	subq	%rax, %rdx
> -# endif
> -	jns	12f
> -	addq	$1000000000, %rdx
> -	decq	%rcx
> -12:	testq	%rcx, %rcx
> -	movq	8(%rsp), %rdi
> -	movq	$-ETIMEDOUT, %r14
> -	js	6f
> -
> -	/* Store relative timeout.  */
> -21:	movq	%rcx, 32(%rsp)
> -	movq	%rdx, 40(%rsp)
> -
> -	movl	cond_futex(%rdi), %r12d
> -
> -	/* Unlock.  */
> -	LOCK
> -# if cond_lock == 0
> -	decl	(%rdi)
> -# else
> -	decl	cond_lock(%rdi)
> -# endif
> -	jne	3f
> -
> -.LcleanupSTART2:
> -4:	callq	__pthread_enable_asynccancel
> -	movl	%eax, (%rsp)
> -
> -	leaq	32(%rsp), %r10
> -	LP_OP(cmp) $-1, dep_mutex(%rdi)
> -	movq	%r12, %rdx
> -# ifdef __ASSUME_PRIVATE_FUTEX
> -	movl	$FUTEX_WAIT, %eax
> -	movl	$(FUTEX_WAIT|FUTEX_PRIVATE_FLAG), %esi
> -	cmove	%eax, %esi
> -# else
> -	movl	$0, %eax
> -	movl	%fs:PRIVATE_FUTEX, %esi
> -	cmove	%eax, %esi
> -#  if FUTEX_WAIT != 0
> -	orl	$FUTEX_WAIT, %esi
> -#  endif
> -# endif
> -	addq	$cond_futex, %rdi
> -	movl	$SYS_futex, %eax
> -	syscall
> -	movq	%rax, %r14
> -
> -	movl	(%rsp), %edi
> -	callq	__pthread_disable_asynccancel
> -.LcleanupEND2:
> -
> -	/* Lock.  */
> -	movq	8(%rsp), %rdi
> -	movl	$1, %esi
> -	xorl	%eax, %eax
> -	LOCK
> -# if cond_lock == 0
> -	cmpxchgl %esi, (%rdi)
> -# else
> -	cmpxchgl %esi, cond_lock(%rdi)
> -# endif
> -	jne	5f
> -
> -6:	movl	broadcast_seq(%rdi), %edx
> -
> -	movq	woken_seq(%rdi), %rax
> -
> -	movq	wakeup_seq(%rdi), %r9
> -
> -	cmpl	4(%rsp), %edx
> -	jne	53b
> -
> -	cmpq	24(%rsp), %r9
> -	jbe	15f
> -
> -	cmpq	%rax, %r9
> -	ja	39b
> -
> -15:	cmpq	$-ETIMEDOUT, %r14
> -	jne	8b
> -
> -	jmp	99b
> -
> -	/* Initial locking failed.  */
> -1:
> -# if cond_lock != 0
> -	addq	$cond_lock, %rdi
> -# endif
> -	LP_OP(cmp) $-1, dep_mutex-cond_lock(%rdi)
> -	movl	$LLL_PRIVATE, %eax
> -	movl	$LLL_SHARED, %esi
> -	cmovne	%eax, %esi
> -	callq	__lll_lock_wait
> -	jmp	2b
> -
> -	/* Unlock in loop requires wakeup.  */
> -3:
> -# if cond_lock != 0
> -	addq	$cond_lock, %rdi
> -# endif
> -	LP_OP(cmp) $-1, dep_mutex-cond_lock(%rdi)
> -	movl	$LLL_PRIVATE, %eax
> -	movl	$LLL_SHARED, %esi
> -	cmovne	%eax, %esi
> -	callq	__lll_unlock_wake
> -	jmp	4b
> -
> -	/* Locking in loop failed.  */
> -5:
> -# if cond_lock != 0
> -	addq	$cond_lock, %rdi
> -# endif
> -	LP_OP(cmp) $-1, dep_mutex-cond_lock(%rdi)
> -	movl	$LLL_PRIVATE, %eax
> -	movl	$LLL_SHARED, %esi
> -	cmovne	%eax, %esi
> -	callq	__lll_lock_wait
> -# if cond_lock != 0
> -	subq	$cond_lock, %rdi
> -# endif
> -	jmp	6b
> -#endif
>  	.size	__pthread_cond_timedwait, .-__pthread_cond_timedwait
>  versioned_symbol (libpthread, __pthread_cond_timedwait, pthread_cond_timedwait,
>  		  GLIBC_2_3_2)
> @@ -815,12 +604,6 @@ __condvar_cleanup2:
>  	.uleb128 .LcleanupEND1-.LcleanupSTART1
>  	.uleb128 __condvar_cleanup2-.LSTARTCODE
>  	.uleb128  0
> -#ifndef __ASSUME_FUTEX_CLOCK_REALTIME
> -	.uleb128 .LcleanupSTART2-.LSTARTCODE
> -	.uleb128 .LcleanupEND2-.LcleanupSTART2
> -	.uleb128 __condvar_cleanup2-.LSTARTCODE
> -	.uleb128  0
> -#endif
>  	.uleb128 .LcallUR-.LSTARTCODE
>  	.uleb128 .LENDCODE-.LcallUR
>  	.uleb128 0
> --- a/sysdeps/unix/sysv/linux/x86_64/x32/init-first.c
> +++ b/sysdeps/unix/sysv/linux/x86_64/x32/init-first.c
> @@ -21,9 +21,7 @@
>  # include <libc-vdso.h>
>
>  long int (*__vdso_clock_gettime) (clockid_t, struct timespec *)
> -  __attribute__ ((nocommon));
> -libc_hidden_proto (__vdso_clock_gettime)
> -libc_hidden_data_def (__vdso_clock_gettime)
> +  attribute_hidden;
>
>  static inline void
>  _libc_vdso_platform_setup (void)
>
Roland McGrath Jan. 30, 2015, 12:26 a.m. UTC | #2
> Since you are modifying the code, which are the advantages and constrains to
> x86_64/i386 carry specific assembly implementation for pthread_cond_* 
> function? What prevent them to use the C default code? 

We've discussed before that we'd like to eliminate the assembly.  We have
just been too conservative to do it without careful performance testing to
give us confidence that it won't be a performance regression, and nobody
has done that yet.  I think it might be on Torvald's agenda.  (If not, we
need a volunteer for that!)
Torvald Riegel Jan. 30, 2015, 9:57 a.m. UTC | #3
On Thu, 2015-01-29 at 16:26 -0800, Roland McGrath wrote:
> > Since you are modifying the code, which are the advantages and constrains to
> > x86_64/i386 carry specific assembly implementation for pthread_cond_* 
> > function? What prevent them to use the C default code? 
> 
> We've discussed before that we'd like to eliminate the assembly.  We have
> just been too conservative to do it without careful performance testing to
> give us confidence that it won't be a performance regression, and nobody
> has done that yet.  I think it might be on Torvald's agenda.  (If not, we
> need a volunteer for that!)

IMO, the condvar assembly has to go because we need to change the
synchronization algorithm to correctly implement the stronger ordering
requirements that POSIX and C++ have clarified they want.  I don't think
anybody will volunteer to write an assembly implementation with the new
algorithm.

Wrt performance, I would guess not having assembly is not critical.
Unless a, say, semaphore, there's no real fast-path in a condvar; you
really want to wait until you get a notification after you started
waiting.  So once you do that, there will be cache misses to some extent
because somebody else is waking you up; thus, the communication cost
will matter most, not any per-thread instruction count differences.

I have the new condvar implementation working except failures on
tst-cond25, and except PI bits (but there's not a lot we can do there
anyway compared to the current implementation).  I first thought this
would be a PI-related issue, but now I'd guess it's rather cancellation
related.  I don't yet know the root cause.  But I see the same test fail
on i686 with the old implementation too.
diff mbox

Patch

--- a/sysdeps/unix/sysv/linux/i386/init-first.c
+++ b/sysdeps/unix/sysv/linux/i386/init-first.c
@@ -23,9 +23,7 @@ 
 # include <libc-vdso.h>
 
 long int (*__vdso_clock_gettime) (clockid_t, struct timespec *)
-  __attribute__ ((nocommon));
-libc_hidden_proto (__vdso_clock_gettime)
-libc_hidden_data_def (__vdso_clock_gettime)
+  attribute_hidden;
 
 static long int
 clock_gettime_syscall (clockid_t id, struct timespec *tp)
--- a/sysdeps/unix/sysv/linux/x86/libc-vdso.h
+++ b/sysdeps/unix/sysv/linux/x86/libc-vdso.h
@@ -24,7 +24,8 @@ 
 
 #ifdef SHARED
 
-extern long int (*__vdso_clock_gettime) (clockid_t, struct timespec *);
+extern long int (*__vdso_clock_gettime) (clockid_t, struct timespec *)
+  attribute_hidden;
 
 #endif
 
--- a/sysdeps/unix/sysv/linux/x86_64/Versions
+++ b/sysdeps/unix/sysv/linux/x86_64/Versions
@@ -6,9 +6,6 @@  libc {
 
     modify_ldt;
   }
-  GLIBC_PRIVATE {
-    __vdso_clock_gettime;
-  }
 }
 
 librt {
--- a/sysdeps/unix/sysv/linux/x86_64/init-first.c
+++ b/sysdeps/unix/sysv/linux/x86_64/init-first.c
@@ -23,10 +23,7 @@ 
 # include <libc-vdso.h>
 
 long int (*__vdso_clock_gettime) (clockid_t, struct timespec *)
-  __attribute__ ((nocommon));
-libc_hidden_proto (__vdso_clock_gettime)
-libc_hidden_data_def (__vdso_clock_gettime)
-
+  attribute_hidden;
 long int (*__vdso_getcpu) (unsigned *, unsigned *, void *) attribute_hidden;
 
 extern long int __syscall_clock_gettime (clockid_t, struct timespec *);
--- a/sysdeps/unix/sysv/linux/x86_64/pthread_cond_timedwait.S
+++ b/sysdeps/unix/sysv/linux/x86_64/pthread_cond_timedwait.S
@@ -59,11 +59,7 @@  __pthread_cond_timedwait:
 	pushq	%r15
 	cfi_adjust_cfa_offset(8)
 	cfi_rel_offset(%r15, 0)
-#ifdef __ASSUME_FUTEX_CLOCK_REALTIME
-# define FRAME_SIZE (32+8)
-#else
-# define FRAME_SIZE (48+8)
-#endif
+#define FRAME_SIZE (32+8)
 	subq	$FRAME_SIZE, %rsp
 	cfi_adjust_cfa_offset(FRAME_SIZE)
 	cfi_remember_state
@@ -105,15 +101,6 @@  __pthread_cond_timedwait:
 22:
 	xorb	%r15b, %r15b
 
-#ifndef __ASSUME_FUTEX_CLOCK_REALTIME
-#  ifdef PIC
-	cmpl	$0, __have_futex_clock_realtime(%rip)
-#  else
-	cmpl	$0, __have_futex_clock_realtime
-#  endif
-	je	.Lreltmo
-#endif
-
 	/* Get internal lock.  */
 	movl	$1, %esi
 	xorl	%eax, %eax
@@ -440,204 +427,6 @@  __pthread_cond_timedwait:
 47:	movq	(%rsp), %rax
 	jmp	48b
 
-
-#ifndef __ASSUME_FUTEX_CLOCK_REALTIME
-.Lreltmo:
-	/* Get internal lock.  */
-	movl	$1, %esi
-	xorl	%eax, %eax
-	LOCK
-# if cond_lock == 0
-	cmpxchgl %esi, (%rdi)
-# else
-	cmpxchgl %esi, cond_lock(%rdi)
-# endif
-	jnz	1f
-
-	/* Unlock the mutex.  */
-2:	movq	16(%rsp), %rdi
-	xorl	%esi, %esi
-	callq	__pthread_mutex_unlock_usercnt
-
-	testl	%eax, %eax
-	jne	46b
-
-	movq	8(%rsp), %rdi
-	incq	total_seq(%rdi)
-	incl	cond_futex(%rdi)
-	addl	$(1 << nwaiters_shift), cond_nwaiters(%rdi)
-
-	/* Get and store current wakeup_seq value.  */
-	movq	8(%rsp), %rdi
-	movq	wakeup_seq(%rdi), %r9
-	movl	broadcast_seq(%rdi), %edx
-	movq	%r9, 24(%rsp)
-	movl	%edx, 4(%rsp)
-
-	/* Get the current time.  */
-8:
-# ifdef __NR_clock_gettime
-	/* Get the clock number.  Note that the field in the condvar
-	   structure stores the number minus 1.  */
-	movq	8(%rsp), %rdi
-	movl	cond_nwaiters(%rdi), %edi
-	andl	$((1 << nwaiters_shift) - 1), %edi
-	/* Only clocks 0 and 1 are allowed so far.  Both are handled in the
-	   kernel.  */
-	leaq	32(%rsp), %rsi
-#  ifdef SHARED
-	mov	__vdso_clock_gettime@GOTPCREL(%rip), %RAX_LP
-	mov	(%rax), %RAX_LP
-	PTR_DEMANGLE (%RAX_LP)
-	call	*%rax
-#  else
-	movl	$__NR_clock_gettime, %eax
-	syscall
-#  endif
-
-	/* Compute relative timeout.  */
-	movq	(%r13), %rcx
-	movq	8(%r13), %rdx
-	subq	32(%rsp), %rcx
-	subq	40(%rsp), %rdx
-# else
-	leaq	24(%rsp), %rdi
-	xorl	%esi, %esi
-	/* This call works because we directly jump to a system call entry
-	   which preserves all the registers.  */
-	call	JUMPTARGET(__gettimeofday)
-
-	/* Compute relative timeout.  */
-	movq	40(%rsp), %rax
-	movl	$1000, %edx
-	mul	%rdx		/* Milli seconds to nano seconds.  */
-	movq	(%r13), %rcx
-	movq	8(%r13), %rdx
-	subq	32(%rsp), %rcx
-	subq	%rax, %rdx
-# endif
-	jns	12f
-	addq	$1000000000, %rdx
-	decq	%rcx
-12:	testq	%rcx, %rcx
-	movq	8(%rsp), %rdi
-	movq	$-ETIMEDOUT, %r14
-	js	6f
-
-	/* Store relative timeout.  */
-21:	movq	%rcx, 32(%rsp)
-	movq	%rdx, 40(%rsp)
-
-	movl	cond_futex(%rdi), %r12d
-
-	/* Unlock.  */
-	LOCK
-# if cond_lock == 0
-	decl	(%rdi)
-# else
-	decl	cond_lock(%rdi)
-# endif
-	jne	3f
-
-.LcleanupSTART2:
-4:	callq	__pthread_enable_asynccancel
-	movl	%eax, (%rsp)
-
-	leaq	32(%rsp), %r10
-	LP_OP(cmp) $-1, dep_mutex(%rdi)
-	movq	%r12, %rdx
-# ifdef __ASSUME_PRIVATE_FUTEX
-	movl	$FUTEX_WAIT, %eax
-	movl	$(FUTEX_WAIT|FUTEX_PRIVATE_FLAG), %esi
-	cmove	%eax, %esi
-# else
-	movl	$0, %eax
-	movl	%fs:PRIVATE_FUTEX, %esi
-	cmove	%eax, %esi
-#  if FUTEX_WAIT != 0
-	orl	$FUTEX_WAIT, %esi
-#  endif
-# endif
-	addq	$cond_futex, %rdi
-	movl	$SYS_futex, %eax
-	syscall
-	movq	%rax, %r14
-
-	movl	(%rsp), %edi
-	callq	__pthread_disable_asynccancel
-.LcleanupEND2:
-
-	/* Lock.  */
-	movq	8(%rsp), %rdi
-	movl	$1, %esi
-	xorl	%eax, %eax
-	LOCK
-# if cond_lock == 0
-	cmpxchgl %esi, (%rdi)
-# else
-	cmpxchgl %esi, cond_lock(%rdi)
-# endif
-	jne	5f
-
-6:	movl	broadcast_seq(%rdi), %edx
-
-	movq	woken_seq(%rdi), %rax
-
-	movq	wakeup_seq(%rdi), %r9
-
-	cmpl	4(%rsp), %edx
-	jne	53b
-
-	cmpq	24(%rsp), %r9
-	jbe	15f
-
-	cmpq	%rax, %r9
-	ja	39b
-
-15:	cmpq	$-ETIMEDOUT, %r14
-	jne	8b
-
-	jmp	99b
-
-	/* Initial locking failed.  */
-1:
-# if cond_lock != 0
-	addq	$cond_lock, %rdi
-# endif
-	LP_OP(cmp) $-1, dep_mutex-cond_lock(%rdi)
-	movl	$LLL_PRIVATE, %eax
-	movl	$LLL_SHARED, %esi
-	cmovne	%eax, %esi
-	callq	__lll_lock_wait
-	jmp	2b
-
-	/* Unlock in loop requires wakeup.  */
-3:
-# if cond_lock != 0
-	addq	$cond_lock, %rdi
-# endif
-	LP_OP(cmp) $-1, dep_mutex-cond_lock(%rdi)
-	movl	$LLL_PRIVATE, %eax
-	movl	$LLL_SHARED, %esi
-	cmovne	%eax, %esi
-	callq	__lll_unlock_wake
-	jmp	4b
-
-	/* Locking in loop failed.  */
-5:
-# if cond_lock != 0
-	addq	$cond_lock, %rdi
-# endif
-	LP_OP(cmp) $-1, dep_mutex-cond_lock(%rdi)
-	movl	$LLL_PRIVATE, %eax
-	movl	$LLL_SHARED, %esi
-	cmovne	%eax, %esi
-	callq	__lll_lock_wait
-# if cond_lock != 0
-	subq	$cond_lock, %rdi
-# endif
-	jmp	6b
-#endif
 	.size	__pthread_cond_timedwait, .-__pthread_cond_timedwait
 versioned_symbol (libpthread, __pthread_cond_timedwait, pthread_cond_timedwait,
 		  GLIBC_2_3_2)
@@ -815,12 +604,6 @@  __condvar_cleanup2:
 	.uleb128 .LcleanupEND1-.LcleanupSTART1
 	.uleb128 __condvar_cleanup2-.LSTARTCODE
 	.uleb128  0
-#ifndef __ASSUME_FUTEX_CLOCK_REALTIME
-	.uleb128 .LcleanupSTART2-.LSTARTCODE
-	.uleb128 .LcleanupEND2-.LcleanupSTART2
-	.uleb128 __condvar_cleanup2-.LSTARTCODE
-	.uleb128  0
-#endif
 	.uleb128 .LcallUR-.LSTARTCODE
 	.uleb128 .LENDCODE-.LcallUR
 	.uleb128 0
--- a/sysdeps/unix/sysv/linux/x86_64/x32/init-first.c
+++ b/sysdeps/unix/sysv/linux/x86_64/x32/init-first.c
@@ -21,9 +21,7 @@ 
 # include <libc-vdso.h>
 
 long int (*__vdso_clock_gettime) (clockid_t, struct timespec *)
-  __attribute__ ((nocommon));
-libc_hidden_proto (__vdso_clock_gettime)
-libc_hidden_data_def (__vdso_clock_gettime)
+  attribute_hidden;
 
 static inline void
 _libc_vdso_platform_setup (void)