diff mbox series

powerpc: Move cache line size to rtld_global_ro

Message ID 20191223184523.70113-1-tuliom@linux.ibm.com
State New
Headers show
Series powerpc: Move cache line size to rtld_global_ro | expand

Commit Message

Tulio Magno Quites Machado Filho Dec. 23, 2019, 6:45 p.m. UTC
GCC 10.0 enabled -fno-common by default and this started to point that
__cache_line_size had been implemented in 2 different places: loader and
libc.

In order to avoid this duplication, the libc variable has been removed
and the loader variable is moved to rtld_global_ro.

File sysdeps/unix/sysv/linux/powerpc/dl-auxv.h has been added in order
to reuse code for both static and dynamic linking scenarios.

Signed-off-by: Tulio Magno Quites Machado Filho <tuliom@linux.ibm.com>
---
 sysdeps/powerpc/dl-procinfo.c                | 17 ++++++++++++
 sysdeps/powerpc/powerpc32/a2/memcpy.S        | 23 ++++++++--------
 sysdeps/powerpc/powerpc32/dl-machine.c       | 11 ++------
 sysdeps/powerpc/powerpc32/memset.S           | 29 +++++++++-----------
 sysdeps/powerpc/powerpc32/sysdep.h           | 26 ++++++++++++++++++
 sysdeps/powerpc/powerpc64/a2/memcpy.S        | 21 +++++++++++---
 sysdeps/powerpc/powerpc64/memset.S           | 24 ++++++++++++++--
 sysdeps/powerpc/rtld-global-offsets.sym      |  1 +
 sysdeps/unix/sysv/linux/powerpc/dl-auxv.h    | 24 ++++++++++++++++
 sysdeps/unix/sysv/linux/powerpc/dl-support.c | 23 ++++++++++++++++
 sysdeps/unix/sysv/linux/powerpc/dl-sysdep.c  | 12 +-------
 sysdeps/unix/sysv/linux/powerpc/libc-start.c | 10 ++-----
 12 files changed, 160 insertions(+), 61 deletions(-)
 create mode 100644 sysdeps/unix/sysv/linux/powerpc/dl-auxv.h
 create mode 100644 sysdeps/unix/sysv/linux/powerpc/dl-support.c

Comments

Adhemerval Zanella Netto Dec. 26, 2019, 5:04 p.m. UTC | #1
On 23/12/2019 15:45, Tulio Magno Quites Machado Filho wrote:
> GCC 10.0 enabled -fno-common by default and this started to point that
> __cache_line_size had been implemented in 2 different places: loader and
> libc.
> 
> In order to avoid this duplication, the libc variable has been removed
> and the loader variable is moved to rtld_global_ro.
> 
> File sysdeps/unix/sysv/linux/powerpc/dl-auxv.h has been added in order
> to reuse code for both static and dynamic linking scenarios.
> 
> Signed-off-by: Tulio Magno Quites Machado Filho <tuliom@linux.ibm.com>

We do not use signed-off.  

Although not strickly necessary since the memset.c implementation already
check for non set _dl_cache_line_size, you may also add the cache-line
initialization for the static dlopen on dl-static.c (as powerpc
already does for dl_pagesize).

I assume you have also checked on powerpc-linux-gnu and powerpc64-linux-gnu,
correct?

> ---
>  sysdeps/powerpc/dl-procinfo.c                | 17 ++++++++++++
>  sysdeps/powerpc/powerpc32/a2/memcpy.S        | 23 ++++++++--------
>  sysdeps/powerpc/powerpc32/dl-machine.c       | 11 ++------
>  sysdeps/powerpc/powerpc32/memset.S           | 29 +++++++++-----------
>  sysdeps/powerpc/powerpc32/sysdep.h           | 26 ++++++++++++++++++
>  sysdeps/powerpc/powerpc64/a2/memcpy.S        | 21 +++++++++++---
>  sysdeps/powerpc/powerpc64/memset.S           | 24 ++++++++++++++--
>  sysdeps/powerpc/rtld-global-offsets.sym      |  1 +
>  sysdeps/unix/sysv/linux/powerpc/dl-auxv.h    | 24 ++++++++++++++++
>  sysdeps/unix/sysv/linux/powerpc/dl-support.c | 23 ++++++++++++++++
>  sysdeps/unix/sysv/linux/powerpc/dl-sysdep.c  | 12 +-------
>  sysdeps/unix/sysv/linux/powerpc/libc-start.c | 10 ++-----
>  12 files changed, 160 insertions(+), 61 deletions(-)
>  create mode 100644 sysdeps/unix/sysv/linux/powerpc/dl-auxv.h
>  create mode 100644 sysdeps/unix/sysv/linux/powerpc/dl-support.c
> 
> diff --git a/sysdeps/powerpc/dl-procinfo.c b/sysdeps/powerpc/dl-procinfo.c
> index 20706a648a..94b33d0c16 100644
> --- a/sysdeps/powerpc/dl-procinfo.c
> +++ b/sysdeps/powerpc/dl-procinfo.c
> @@ -89,5 +89,22 @@ PROCINFO_CLASS const char _dl_powerpc_cap_flags[64][15]
>  ,
>  #endif
>  
> +#if !IS_IN (ldconfig)
> +# if !defined PROCINFO_DECL && defined SHARED
> +     ._dl_cache_line_size
> +# else
> +PROCINFO_CLASS int _dl_cache_line_size
> +# endif
> +# ifndef PROCINFO_DECL
> +     = 0
> +# endif
> +# if !defined SHARED || defined PROCINFO_DECL
> +;
> +# else
> +,
> +# endif
> +#endif
> +
> +
>  #undef PROCINFO_DECL
>  #undef PROCINFO_CLASS

Ok.

> diff --git a/sysdeps/powerpc/powerpc32/a2/memcpy.S b/sysdeps/powerpc/powerpc32/a2/memcpy.S
> index 9bc91a8df1..ecfea5c832 100644
> --- a/sysdeps/powerpc/powerpc32/a2/memcpy.S
> +++ b/sysdeps/powerpc/powerpc32/a2/memcpy.S
> @@ -18,6 +18,7 @@
>     <https://www.gnu.org/licenses/>.  */
>  
>  #include <sysdep.h>
> +#include <rtld-global-offsets.h>
>  
>  #define PREFETCH_AHEAD 4        /* no cache lines SRC prefetching ahead  */
>  #define ZERO_AHEAD 2            /* no cache lines DST zeroing ahead  */
> @@ -106,25 +107,23 @@ EALIGN (memcpy, 5, 0)
>  L(dst_aligned):
>  
>  
> -#ifdef SHARED
> +#ifdef PIC
>  	mflr    r0
> -/* Establishes GOT addressability so we can load __cache_line_size
> -   from static. This value was set from the aux vector during startup.  */
> +/* Establishes GOT addressability so we can load the cache line size
> +   from rtld_global_ro. This value was set from the aux vector during
> +   startup.  */

My understanding is code guidelines states two spaces after the end of a 
sentence in comments.

>  	SETUP_GOT_ACCESS(r9,got_label)
> -	addis   r9,r9,__cache_line_size-got_label@ha
> -	lwz     r9,__cache_line_size-got_label@l(r9)
> -	mtlr    r0
> -#else
> -/* Load __cache_line_size from static. This value was set from the
> -   aux vector during startup.  */
> -	lis     r9,__cache_line_size@ha
> -	lwz     r9,__cache_line_size@l(r9)
> +	addis	r9,r9,_GLOBAL_OFFSET_TABLE_-got_label@ha
> +	addi	r9,r9,_GLOBAL_OFFSET_TABLE_-got_label@l
> +	mtlr	r0
>  #endif
> +	__GLRO(r9, r9, _dl_cache_line_size,
> +	       RTLD_GLOBAL_RO_DL_CACHE_LINE_SIZE_OFFSET)
>  
>  	cmplwi  cr5, r9, 0
>  	bne+    cr5,L(cachelineset)
>  
> -/* __cache_line_size not set: generic byte copy without much optimization */
> +/* Cache line size not set: generic byte copy without much optimization */
>  	andi.	r0,r5,1		/* If length is odd copy one byte.  */
>  	beq	L(cachelinenotset_align)
>  	lbz	r7,0(r4)	/* Read one byte from source.  */

Ok.

> diff --git a/sysdeps/powerpc/powerpc32/dl-machine.c b/sysdeps/powerpc/powerpc32/dl-machine.c
> index a90cbc1ae3..740ff8f2ec 100644
> --- a/sysdeps/powerpc/powerpc32/dl-machine.c
> +++ b/sysdeps/powerpc/powerpc32/dl-machine.c
> @@ -25,11 +25,6 @@
>  #include <dl-machine.h>
>  #include <_itoa.h>
>  
> -/* The value __cache_line_size is defined in dl-sysdep.c and is initialised
> -   by _dl_sysdep_start via DL_PLATFORM_INIT.  */
> -extern int __cache_line_size attribute_hidden;
> -
> -
>  /* Stuff for the PLT.  */
>  #define PLT_INITIAL_ENTRY_WORDS 18
>  #define PLT_LONGBRANCH_ENTRY_WORDS 0
> @@ -309,14 +304,14 @@ __elf_machine_runtime_setup (struct link_map *map, int lazy, int profile)
>  
>  	 Assumes that dcbst and icbi apply to lines of 16 bytes or
>  	 more.  Current known line sizes are 16, 32, and 128 bytes.
> -	 The following gets the __cache_line_size, when available.  */
> +	 The following gets the cache line size, when available.  */
>  
>        /* Default minimum 4 words per cache line.  */
>        int line_size_words = 4;
>  
> -      if (lazy && __cache_line_size != 0)
> +      if (lazy && GLRO(dl_cache_line_size) != 0)
>  	/* Convert bytes to words.  */
> -	line_size_words = __cache_line_size / 4;
> +	line_size_words = GLRO(dl_cache_line_size) / 4;
>  
>        size_modified = lazy ? rel_offset_words : 6;
>        for (i = 0; i < size_modified; i += line_size_words)

Ok.

> diff --git a/sysdeps/powerpc/powerpc32/memset.S b/sysdeps/powerpc/powerpc32/memset.S
> index fc8b1a2547..ea96913e4e 100644
> --- a/sysdeps/powerpc/powerpc32/memset.S
> +++ b/sysdeps/powerpc/powerpc32/memset.S
> @@ -17,12 +17,13 @@
>     <https://www.gnu.org/licenses/>.  */
>  
>  #include <sysdep.h>
> +#include <rtld-global-offsets.h>
>  
>  /* void * [r3] memset (void *s [r3], int c [r4], size_t n [r5]));
>     Returns 's'.
>  
>     The memset is done in four sizes: byte (8 bits), word (32 bits),
> -   32-byte blocks (256 bits) and __cache_line_size (128, 256, 1024 bits).
> +   32-byte blocks (256 bits) and cache line size (128, 256, 1024 bits).
>     There is a special case for setting whole cache lines to 0, which
>     takes advantage of the dcbz instruction.  */
>  
> @@ -95,7 +96,7 @@ L(caligned):
>  
>  /* Check if we can use the special case for clearing memory using dcbz.
>     This requires that we know the correct cache line size for this
> -   processor.  Getting the __cache_line_size may require establishing GOT
> +   processor.  Getting the cache line size may require establishing GOT
>     addressability, so branch out of line to set this up.  */
>  	beq	cr1, L(checklinesize)
>  

Ok.

> @@ -230,26 +231,22 @@ L(medium_28t):
>  	blr
>  
>  L(checklinesize):
> -#ifdef SHARED
> -	mflr	rTMP
>  /* If the remaining length is less the 32 bytes then don't bother getting
>     the cache line size.  */
>  	beq	L(medium)
> -/* Establishes GOT addressability so we can load __cache_line_size
> -   from static. This value was set from the aux vector during startup.  */
> +#ifdef PIC
> +	mflr	rTMP
> +/* Establishes GOT addressability so we can load the cache line size
> +   from rtld_global_ro. This value was set from the aux vector during
> +   startup.  */
>  	SETUP_GOT_ACCESS(rGOT,got_label)
> -	addis	rGOT,rGOT,__cache_line_size-got_label@ha
> -	lwz	rCLS,__cache_line_size-got_label@l(rGOT)
> +	addis	rGOT,rGOT,_GLOBAL_OFFSET_TABLE_-got_label@ha
> +	addi	rGOT,rGOT,_GLOBAL_OFFSET_TABLE_-got_label@l
>  	mtlr	rTMP
> -#else
> -/* Load __cache_line_size from static. This value was set from the
> -   aux vector during startup.  */
> -	lis	rCLS,__cache_line_size@ha
> -/* If the remaining length is less the 32 bytes then don't bother getting
> -   the cache line size.  */
> -	beq	L(medium)
> -	lwz	rCLS,__cache_line_size@l(rCLS)
>  #endif
> +/* Load rtld_global_ro._dl_cache_line_size.  */
> +	__GLRO(rCLS, rGOT, _dl_cache_line_size,
> +	       RTLD_GLOBAL_RO_DL_CACHE_LINE_SIZE_OFFSET)
>  
>  /* If the cache line size was not set then goto to L(nondcbz), which is
>     safe for any cache line size.  */

Ok.

> diff --git a/sysdeps/powerpc/powerpc32/sysdep.h b/sysdeps/powerpc/powerpc32/sysdep.h
> index c21ea87f5a..4605276ebf 100644
> --- a/sysdeps/powerpc/powerpc32/sysdep.h
> +++ b/sysdeps/powerpc/powerpc32/sysdep.h
> @@ -157,4 +157,30 @@ GOT_LABEL:			;					      \
>  /* Label in text section.  */
>  #define C_TEXT(name) name
>  
> +/* Read the value of member from rtld_global_ro.  */
> +#ifdef PIC
> +# ifdef SHARED
> +#  if IS_IN (rtld)
> +/* Inside ld.so we use the local alias to avoid runtime GOT
> +   relocations.  */
> +#   define __GLRO(rOUT, rGOT, member, offset)				\
> +	lwz     rOUT,_rtld_local_ro@got(rGOT);				\
> +	lwz     rOUT,offset(rOUT)
> +#  else
> +#   define __GLRO(rOUT, rGOT, member, offset)				\
> +	lwz     rOUT,_rtld_global_ro@got(rGOT);				\
> +	lwz     rOUT,offset(rOUT)
> +#  endif
> +# else
> +#  define __GLRO(rOUT, rGOT, member, offset)				\
> +	lwz     rOUT,member@got(rGOT);					\
> +	lwz     rOUT,0(rOUT)
> +# endif
> +#else
> +/* Position-dependent code does not require access to the GOT.  */
> +# define __GLRO(rOUT, rGOT, member, offset)				\
> +	lis     rOUT,(member+LOWORD)@ha					\
> +	lwz     rOUT,(member+LOWORD)@l(rOUT)
> +#endif	/* PIC */
> +
>  #endif	/* __ASSEMBLER__ */

Ok.

> diff --git a/sysdeps/powerpc/powerpc64/a2/memcpy.S b/sysdeps/powerpc/powerpc64/a2/memcpy.S
> index 6d5c5afddb..e515255126 100644
> --- a/sysdeps/powerpc/powerpc64/a2/memcpy.S
> +++ b/sysdeps/powerpc/powerpc64/a2/memcpy.S
> @@ -18,6 +18,7 @@
>     <https://www.gnu.org/licenses/>.  */
>  
>  #include <sysdep.h>
> +#include <rtld-global-offsets.h>
>  
>  #ifndef MEMCPY
>  # define MEMCPY memcpy
> @@ -27,8 +28,19 @@
>  #define ZERO_AHEAD 2            /* no cache lines DST zeroing ahead  */
>  
>  	.section        ".toc","aw"
> -.LC0:
> -	.tc __cache_line_size[TC],__cache_line_size
> +.LC__dl_cache_line_size:
> +#ifdef SHARED
> +# if IS_IN (rtld)
> +	/* Inside ld.so we use the local alias to avoid runtime GOT
> +	   relocations.  */
> +	.tc _rtld_local_ro[TC],_rtld_local_ro
> +# else
> +	.tc _rtld_global_ro[TC],_rtld_global_ro
> +# endif
> +#else
> +	.tc _dl_cache_line_size[TC],_dl_cache_line_size
> +#endif
> +
>  	.section        ".text"
>  	.align 2
>  

Ok. Maybe add a similar macro for powerpc64 as you did for powerpc32 with
__GLRO ?

> @@ -55,7 +67,8 @@ ENTRY (MEMCPY, 5)
>  	*/
>  
>  	neg     r8,r3           /* LS 4 bits = # bytes to 8-byte dest bdry  */
> -	ld      r9,.LC0@toc(r2) /* Get cache line size (part 1) */
> +	/* Get cache line size (part 1) */
> +	ld      r9,.LC__dl_cache_line_size@toc(r2)
>  	clrldi  r8,r8,64-4      /* align to 16byte boundary  */
>  	sub     r7,r4,r3        /* compute offset to src from dest */
>  	lwz     r9,0(r9)        /* Get cache line size (part 2) */

Ok.

> @@ -121,7 +134,7 @@ L(dst_aligned):
>  	cmpdi	cr0,r9,0	/* Cache line size set? */
>  	bne+	cr0,L(cachelineset)
>  
> -/* __cache_line_size not set: generic byte copy without much optimization */
> +/* Cache line size not set: generic byte copy without much optimization */
>  	clrldi.	r0,r5,63	/* If length is odd copy one byte */
>  	beq	L(cachelinenotset_align)
>  	lbz	r7,0(r4)	/* Read one byte from source */

Ok.

> diff --git a/sysdeps/powerpc/powerpc64/memset.S b/sysdeps/powerpc/powerpc64/memset.S
> index 5d96696e87..d49ef102d4 100644
> --- a/sysdeps/powerpc/powerpc64/memset.S
> +++ b/sysdeps/powerpc/powerpc64/memset.S
> @@ -17,10 +17,22 @@
>     <https://www.gnu.org/licenses/>.  */
>  
>  #include <sysdep.h>
> +#include <rtld-global-offsets.h>
>  
>  	.section	".toc","aw"
> -.LC0:
> -	.tc __cache_line_size[TC],__cache_line_size
> +.LC__dl_cache_line_size:
> +#ifdef SHARED
> +# if IS_IN (rtld)
> +	/* Inside ld.so we use the local alias to avoid runtime GOT
> +	   relocations.  */
> +	.tc _rtld_local_ro[TC],_rtld_local_ro
> +# else
> +	.tc _rtld_global_ro[TC],_rtld_global_ro
> +# endif
> +#else
> +	.tc _dl_cache_line_size[TC],_dl_cache_line_size
> +#endif
> +
>  	.section	".text"
>  	.align 2
>  

Ok.

> @@ -146,8 +158,14 @@ L(zloopstart):
>  /* If the remaining length is less the 32 bytes, don't bother getting
>  	 the cache line size.  */
>  	beq	L(medium)
> -	ld	rCLS,.LC0@toc(r2)
> +	ld	rCLS,.LC__dl_cache_line_size@toc(r2)
> +# ifdef SHARED
> +	/* Load _rtld_global_ro._dl_cache_line_size.  */
> +	lwz	rCLS,RTLD_GLOBAL_RO_DL_CACHE_LINE_SIZE_OFFSET(rCLS)
> +# else
> +	/* Load _dl_cache_line_size.  */
>  	lwz	rCLS,0(rCLS)
> +# endif
>  /* If the cache line size was not set just goto to L(nondcbz) which is
>  	 safe for any cache line size.  */
>  	cmpldi	cr1,rCLS,0

Ok.

> diff --git a/sysdeps/powerpc/rtld-global-offsets.sym b/sysdeps/powerpc/rtld-global-offsets.sym
> index f5ea5a1466..6b348fd522 100644
> --- a/sysdeps/powerpc/rtld-global-offsets.sym
> +++ b/sysdeps/powerpc/rtld-global-offsets.sym
> @@ -6,3 +6,4 @@
>  
>  RTLD_GLOBAL_RO_DL_HWCAP_OFFSET	rtld_global_ro_offsetof (_dl_hwcap)
>  RTLD_GLOBAL_RO_DL_HWCAP2_OFFSET	rtld_global_ro_offsetof (_dl_hwcap2)
> +RTLD_GLOBAL_RO_DL_CACHE_LINE_SIZE_OFFSET	rtld_global_ro_offsetof (_dl_cache_line_size)

Ok.

> diff --git a/sysdeps/unix/sysv/linux/powerpc/dl-auxv.h b/sysdeps/unix/sysv/linux/powerpc/dl-auxv.h
> new file mode 100644
> index 0000000000..44bea09974
> --- /dev/null
> +++ b/sysdeps/unix/sysv/linux/powerpc/dl-auxv.h
> @@ -0,0 +1,24 @@
> +/* Auxiliary vector processing.  Linux/PPC version.
> +   Copyright (C) 2019 Free Software Foundation, Inc.
> +   This file is part of the GNU C Library.
> +
> +   The GNU C Library is free software; you can redistribute it and/or
> +   modify it under the terms of the GNU Lesser General Public
> +   License as published by the Free Software Foundation; either
> +   version 2.1 of the License, or (at your option) any later version.
> +
> +   The GNU C Library is distributed in the hope that it will be useful,
> +   but WITHOUT ANY WARRANTY; without even the implied warranty of
> +   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
> +   Lesser General Public License for more details.
> +
> +   You should have received a copy of the GNU Lesser General Public
> +   License along with the GNU C Library; if not, see
> +   <https://www.gnu.org/licenses/>.  */
> +
> +/* Scan the Aux Vector for the "Data Cache Block Size" entry and assign it
> +   to dl_cache_line_size.  */
> +#define DL_PLATFORM_AUXV						      \
> +      case AT_DCACHEBSIZE:						      \
> +	GLRO(dl_cache_line_size) = av->a_un.a_val;			      \
> +	break;

Ok.

> diff --git a/sysdeps/unix/sysv/linux/powerpc/dl-support.c b/sysdeps/unix/sysv/linux/powerpc/dl-support.c
> new file mode 100644
> index 0000000000..2a792d7092
> --- /dev/null
> +++ b/sysdeps/unix/sysv/linux/powerpc/dl-support.c
> @@ -0,0 +1,23 @@
> +/* Support for dynamic linking code in static libc.  Linux/PPC version.
> +   Copyright (C) 2019 Free Software Foundation, Inc.
> +   This file is part of the GNU C Library.
> +
> +   The GNU C Library is free software; you can redistribute it and/or
> +   modify it under the terms of the GNU Lesser General Public
> +   License as published by the Free Software Foundation; either
> +   version 2.1 of the License, or (at your option) any later version.
> +
> +   The GNU C Library is distributed in the hope that it will be useful,
> +   but WITHOUT ANY WARRANTY; without even the implied warranty of
> +   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
> +   Lesser General Public License for more details.
> +
> +   You should have received a copy of the GNU Lesser General Public
> +   License along with the GNU C Library; if not, see
> +   <https://www.gnu.org/licenses/>.  */
> +
> +#include "dl-auxv.h"
> +#include <ldsodefs.h>
> +int GLRO(dl_cache_line_size);
> +
> +#include <elf/dl-support.c>

Why is it required? It sould be already covered by inclusion of
sysdeps/powerpc/dl-procinfo.c and it does not required the dl-auxv.h
definitions here.

> diff --git a/sysdeps/unix/sysv/linux/powerpc/dl-sysdep.c b/sysdeps/unix/sysv/linux/powerpc/dl-sysdep.c
> index fa19cc66c0..6d51d6061e 100644
> --- a/sysdeps/unix/sysv/linux/powerpc/dl-sysdep.c
> +++ b/sysdeps/unix/sysv/linux/powerpc/dl-sysdep.c
> @@ -18,16 +18,6 @@
>  
>  #include <config.h>
>  #include <ldsodefs.h>
> -
> -int __cache_line_size attribute_hidden;
> -
> -/* Scan the Aux Vector for the "Data Cache Block Size" entry.  If found
> -   verify that the static extern __cache_line_size is defined by checking
> -   for not NULL.  If it is defined then assign the cache block size
> -   value to __cache_line_size.  */
> -#define DL_PLATFORM_AUXV						      \
> -      case AT_DCACHEBSIZE:						      \
> -	__cache_line_size = av->a_un.a_val;				      \
> -	break;
> +#include <dl-auxv.h>
>  
>  #include <sysdeps/unix/sysv/linux/dl-sysdep.c>

This is essentially an empty file that just include dl-auxv.h. I think
it would be better to just create a generic empty dl-auxv.h, include it
on default dl-sysdep.c, and remove this file.

> diff --git a/sysdeps/unix/sysv/linux/powerpc/libc-start.c b/sysdeps/unix/sysv/linux/powerpc/libc-start.c
> index a659a9144f..3a5eb0e952 100644
> --- a/sysdeps/unix/sysv/linux/powerpc/libc-start.c
> +++ b/sysdeps/unix/sysv/linux/powerpc/libc-start.c
> @@ -24,7 +24,6 @@
>  #include <hwcapinfo.h>
>  #endif
>  
> -int __cache_line_size attribute_hidden;
>  /* The main work is done in the generic function.  */
>  #define LIBC_START_MAIN generic_start_main
>  #define LIBC_START_DISABLE_INLINE
> @@ -71,15 +70,12 @@ __libc_start_main (int argc, char **argv,
>        rtld_fini = NULL;
>      }
>  
> -  /* Initialize the __cache_line_size variable from the aux vector.  For the
> -     static case, we also need _dl_hwcap, _dl_hwcap2 and _dl_platform, so we
> -     can call __tcb_parse_hwcap_and_convert_at_platform ().  */
>    for (ElfW (auxv_t) * av = auxvec; av->a_type != AT_NULL; ++av)
>      switch (av->a_type)
>        {
> -      case AT_DCACHEBSIZE:
> -	__cache_line_size = av->a_un.a_val;
> -	break;
> +      /* For the static case, we also need _dl_hwcap, _dl_hwcap2 and
> +         _dl_platform, so we can call
> +         __tcb_parse_hwcap_and_convert_at_platform ().  */
>  #ifndef SHARED
>        case AT_HWCAP:
>  	_dl_hwcap = (unsigned long int) av->a_un.a_val;
> 

Ok.
Tulio Magno Quites Machado Filho Dec. 27, 2019, 3:42 p.m. UTC | #2
Adhemerval Zanella <adhemerval.zanella@linaro.org> writes:

> On 23/12/2019 15:45, Tulio Magno Quites Machado Filho wrote:
>> GCC 10.0 enabled -fno-common by default and this started to point that
>> __cache_line_size had been implemented in 2 different places: loader and
>> libc.
>> 
>> In order to avoid this duplication, the libc variable has been removed
>> and the loader variable is moved to rtld_global_ro.
>> 
>> File sysdeps/unix/sysv/linux/powerpc/dl-auxv.h has been added in order
>> to reuse code for both static and dynamic linking scenarios.
>> 
>> Signed-off-by: Tulio Magno Quites Machado Filho <tuliom@linux.ibm.com>
>
> We do not use signed-off.  

Ack.

> Although not strickly necessary since the memset.c implementation already
> check for non set _dl_cache_line_size, you may also add the cache-line
> initialization for the static dlopen on dl-static.c (as powerpc
> already does for dl_pagesize).

Good point.  That would allow a dlopened DSO from a statically linked
executable access dl_cache_line_size too.

> I assume you have also checked on powerpc-linux-gnu and powerpc64-linux-gnu,
> correct?

Correct.

>> diff --git a/sysdeps/powerpc/powerpc32/a2/memcpy.S b/sysdeps/powerpc/powerpc32/a2/memcpy.S
>> index 9bc91a8df1..ecfea5c832 100644
>> --- a/sysdeps/powerpc/powerpc32/a2/memcpy.S
>> +++ b/sysdeps/powerpc/powerpc32/a2/memcpy.S
>> @@ -18,6 +18,7 @@
>>     <https://www.gnu.org/licenses/>.  */
>>  
>>  #include <sysdep.h>
>> +#include <rtld-global-offsets.h>
>>  
>>  #define PREFETCH_AHEAD 4        /* no cache lines SRC prefetching ahead  */
>>  #define ZERO_AHEAD 2            /* no cache lines DST zeroing ahead  */
>> @@ -106,25 +107,23 @@ EALIGN (memcpy, 5, 0)
>>  L(dst_aligned):
>>  
>>  
>> -#ifdef SHARED
>> +#ifdef PIC
>>  	mflr    r0
>> -/* Establishes GOT addressability so we can load __cache_line_size
>> -   from static. This value was set from the aux vector during startup.  */
>> +/* Establishes GOT addressability so we can load the cache line size
>> +   from rtld_global_ro. This value was set from the aux vector during
>> +   startup.  */
>
> My understanding is code guidelines states two spaces after the end of a 
> sentence in comments.

Indeed.

>> diff --git a/sysdeps/powerpc/powerpc64/a2/memcpy.S b/sysdeps/powerpc/powerpc64/a2/memcpy.S
>> index 6d5c5afddb..e515255126 100644
>> --- a/sysdeps/powerpc/powerpc64/a2/memcpy.S
>> +++ b/sysdeps/powerpc/powerpc64/a2/memcpy.S
>> @@ -18,6 +18,7 @@
>>     <https://www.gnu.org/licenses/>.  */
>>  
>>  #include <sysdep.h>
>> +#include <rtld-global-offsets.h>
>>  
>>  #ifndef MEMCPY
>>  # define MEMCPY memcpy
>> @@ -27,8 +28,19 @@
>>  #define ZERO_AHEAD 2            /* no cache lines DST zeroing ahead  */
>>  
>>  	.section        ".toc","aw"
>> -.LC0:
>> -	.tc __cache_line_size[TC],__cache_line_size
>> +.LC__dl_cache_line_size:
>> +#ifdef SHARED
>> +# if IS_IN (rtld)
>> +	/* Inside ld.so we use the local alias to avoid runtime GOT
>> +	   relocations.  */
>> +	.tc _rtld_local_ro[TC],_rtld_local_ro
>> +# else
>> +	.tc _rtld_global_ro[TC],_rtld_global_ro
>> +# endif
>> +#else
>> +	.tc _dl_cache_line_size[TC],_dl_cache_line_size
>> +#endif
>> +
>>  	.section        ".text"
>>  	.align 2
>>  
>
> Ok. Maybe add a similar macro for powerpc64 as you did for powerpc32 with
> __GLRO ?

Are you suggesting to create a __GLRO macro that would define this label?

>> diff --git a/sysdeps/unix/sysv/linux/powerpc/dl-support.c b/sysdeps/unix/sysv/linux/powerpc/dl-support.c
>> new file mode 100644
>> index 0000000000..2a792d7092
>> --- /dev/null
>> +++ b/sysdeps/unix/sysv/linux/powerpc/dl-support.c
>> @@ -0,0 +1,23 @@
>> +/* Support for dynamic linking code in static libc.  Linux/PPC version.
>> +   Copyright (C) 2019 Free Software Foundation, Inc.
>> +   This file is part of the GNU C Library.
>> +
>> +   The GNU C Library is free software; you can redistribute it and/or
>> +   modify it under the terms of the GNU Lesser General Public
>> +   License as published by the Free Software Foundation; either
>> +   version 2.1 of the License, or (at your option) any later version.
>> +
>> +   The GNU C Library is distributed in the hope that it will be useful,
>> +   but WITHOUT ANY WARRANTY; without even the implied warranty of
>> +   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
>> +   Lesser General Public License for more details.
>> +
>> +   You should have received a copy of the GNU Lesser General Public
>> +   License along with the GNU C Library; if not, see
>> +   <https://www.gnu.org/licenses/>.  */
>> +
>> +#include "dl-auxv.h"
>> +#include <ldsodefs.h>
>> +int GLRO(dl_cache_line_size);
>> +
>> +#include <elf/dl-support.c>
>
> Why is it required? It sould be already covered by inclusion of
> sysdeps/powerpc/dl-procinfo.c and it does not required the dl-auxv.h
> definitions here.

This is the code that initializes it statically.  It avoids duplicating code.
dl-procinfo.c does not copy AT_DCACHEBSIZE to GLRO(dl_cache_line_size).

>> diff --git a/sysdeps/unix/sysv/linux/powerpc/dl-sysdep.c b/sysdeps/unix/sysv/linux/powerpc/dl-sysdep.c
>> index fa19cc66c0..6d51d6061e 100644
>> --- a/sysdeps/unix/sysv/linux/powerpc/dl-sysdep.c
>> +++ b/sysdeps/unix/sysv/linux/powerpc/dl-sysdep.c
>> @@ -18,16 +18,6 @@
>>  
>>  #include <config.h>
>>  #include <ldsodefs.h>
>> -
>> -int __cache_line_size attribute_hidden;
>> -
>> -/* Scan the Aux Vector for the "Data Cache Block Size" entry.  If found
>> -   verify that the static extern __cache_line_size is defined by checking
>> -   for not NULL.  If it is defined then assign the cache block size
>> -   value to __cache_line_size.  */
>> -#define DL_PLATFORM_AUXV						      \
>> -      case AT_DCACHEBSIZE:						      \
>> -	__cache_line_size = av->a_un.a_val;				      \
>> -	break;
>> +#include <dl-auxv.h>
>>  
>>  #include <sysdeps/unix/sysv/linux/dl-sysdep.c>
>
> This is essentially an empty file that just include dl-auxv.h. I think
> it would be better to just create a generic empty dl-auxv.h, include it
> on default dl-sysdep.c, and remove this file.

I'm fine with this proposal, but I don't think it's going to help neither alpha
or x86 which have slightly different implementations.
Adhemerval Zanella Netto Dec. 27, 2019, 4:47 p.m. UTC | #3
On 27/12/2019 12:42, Tulio Magno Quites Machado Filho wrote:
> Adhemerval Zanella <adhemerval.zanella@linaro.org> writes:
> 
>> On 23/12/2019 15:45, Tulio Magno Quites Machado Filho wrote:
>>> GCC 10.0 enabled -fno-common by default and this started to point that
>>> __cache_line_size had been implemented in 2 different places: loader and
>>> libc.
>>>
>>> In order to avoid this duplication, the libc variable has been removed
>>> and the loader variable is moved to rtld_global_ro.
>>>
>>> File sysdeps/unix/sysv/linux/powerpc/dl-auxv.h has been added in order
>>> to reuse code for both static and dynamic linking scenarios.
>>>
>>> Signed-off-by: Tulio Magno Quites Machado Filho <tuliom@linux.ibm.com>
>>
>> We do not use signed-off.  
> 
> Ack.
> 
>> Although not strickly necessary since the memset.c implementation already
>> check for non set _dl_cache_line_size, you may also add the cache-line
>> initialization for the static dlopen on dl-static.c (as powerpc
>> already does for dl_pagesize).
> 
> Good point.  That would allow a dlopened DSO from a statically linked
> executable access dl_cache_line_size too.
> 
>> I assume you have also checked on powerpc-linux-gnu and powerpc64-linux-gnu,
>> correct?
> 
> Correct.
> 
>>> diff --git a/sysdeps/powerpc/powerpc32/a2/memcpy.S b/sysdeps/powerpc/powerpc32/a2/memcpy.S
>>> index 9bc91a8df1..ecfea5c832 100644
>>> --- a/sysdeps/powerpc/powerpc32/a2/memcpy.S
>>> +++ b/sysdeps/powerpc/powerpc32/a2/memcpy.S
>>> @@ -18,6 +18,7 @@
>>>     <https://www.gnu.org/licenses/>.  */
>>>  
>>>  #include <sysdep.h>
>>> +#include <rtld-global-offsets.h>
>>>  
>>>  #define PREFETCH_AHEAD 4        /* no cache lines SRC prefetching ahead  */
>>>  #define ZERO_AHEAD 2            /* no cache lines DST zeroing ahead  */
>>> @@ -106,25 +107,23 @@ EALIGN (memcpy, 5, 0)
>>>  L(dst_aligned):
>>>  
>>>  
>>> -#ifdef SHARED
>>> +#ifdef PIC
>>>  	mflr    r0
>>> -/* Establishes GOT addressability so we can load __cache_line_size
>>> -   from static. This value was set from the aux vector during startup.  */
>>> +/* Establishes GOT addressability so we can load the cache line size
>>> +   from rtld_global_ro. This value was set from the aux vector during
>>> +   startup.  */
>>
>> My understanding is code guidelines states two spaces after the end of a 
>> sentence in comments.
> 
> Indeed.
> 
>>> diff --git a/sysdeps/powerpc/powerpc64/a2/memcpy.S b/sysdeps/powerpc/powerpc64/a2/memcpy.S
>>> index 6d5c5afddb..e515255126 100644
>>> --- a/sysdeps/powerpc/powerpc64/a2/memcpy.S
>>> +++ b/sysdeps/powerpc/powerpc64/a2/memcpy.S
>>> @@ -18,6 +18,7 @@
>>>     <https://www.gnu.org/licenses/>.  */
>>>  
>>>  #include <sysdep.h>
>>> +#include <rtld-global-offsets.h>
>>>  
>>>  #ifndef MEMCPY
>>>  # define MEMCPY memcpy
>>> @@ -27,8 +28,19 @@
>>>  #define ZERO_AHEAD 2            /* no cache lines DST zeroing ahead  */
>>>  
>>>  	.section        ".toc","aw"
>>> -.LC0:
>>> -	.tc __cache_line_size[TC],__cache_line_size
>>> +.LC__dl_cache_line_size:
>>> +#ifdef SHARED
>>> +# if IS_IN (rtld)
>>> +	/* Inside ld.so we use the local alias to avoid runtime GOT
>>> +	   relocations.  */
>>> +	.tc _rtld_local_ro[TC],_rtld_local_ro
>>> +# else
>>> +	.tc _rtld_global_ro[TC],_rtld_global_ro
>>> +# endif
>>> +#else
>>> +	.tc _dl_cache_line_size[TC],_dl_cache_line_size
>>> +#endif
>>> +
>>>  	.section        ".text"
>>>  	.align 2
>>>  
>>
>> Ok. Maybe add a similar macro for powerpc64 as you did for powerpc32 with
>> __GLRO ?
> 
> Are you suggesting to create a __GLRO macro that would define this label?

Or something similar to avoid the duplicated defition here and at
sysdeps/powerpc/powerpc64/memset.S.

> 
>>> diff --git a/sysdeps/unix/sysv/linux/powerpc/dl-support.c b/sysdeps/unix/sysv/linux/powerpc/dl-support.c
>>> new file mode 100644
>>> index 0000000000..2a792d7092
>>> --- /dev/null
>>> +++ b/sysdeps/unix/sysv/linux/powerpc/dl-support.c
>>> @@ -0,0 +1,23 @@
>>> +/* Support for dynamic linking code in static libc.  Linux/PPC version.
>>> +   Copyright (C) 2019 Free Software Foundation, Inc.
>>> +   This file is part of the GNU C Library.
>>> +
>>> +   The GNU C Library is free software; you can redistribute it and/or
>>> +   modify it under the terms of the GNU Lesser General Public
>>> +   License as published by the Free Software Foundation; either
>>> +   version 2.1 of the License, or (at your option) any later version.
>>> +
>>> +   The GNU C Library is distributed in the hope that it will be useful,
>>> +   but WITHOUT ANY WARRANTY; without even the implied warranty of
>>> +   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
>>> +   Lesser General Public License for more details.
>>> +
>>> +   You should have received a copy of the GNU Lesser General Public
>>> +   License along with the GNU C Library; if not, see
>>> +   <https://www.gnu.org/licenses/>.  */
>>> +
>>> +#include "dl-auxv.h"
>>> +#include <ldsodefs.h>
>>> +int GLRO(dl_cache_line_size);
>>> +
>>> +#include <elf/dl-support.c>
>>
>> Why is it required? It sould be already covered by inclusion of
>> sysdeps/powerpc/dl-procinfo.c and it does not required the dl-auxv.h
>> definitions here.
> 
> This is the code that initializes it statically.  It avoids duplicating code.
> dl-procinfo.c does not copy AT_DCACHEBSIZE to GLRO(dl_cache_line_size).

But with a generic dl-auxv.h that defines DL_PLATFORM_AUXV, this
files is not really required. 

> 
>>> diff --git a/sysdeps/unix/sysv/linux/powerpc/dl-sysdep.c b/sysdeps/unix/sysv/linux/powerpc/dl-sysdep.c
>>> index fa19cc66c0..6d51d6061e 100644
>>> --- a/sysdeps/unix/sysv/linux/powerpc/dl-sysdep.c
>>> +++ b/sysdeps/unix/sysv/linux/powerpc/dl-sysdep.c
>>> @@ -18,16 +18,6 @@
>>>  
>>>  #include <config.h>
>>>  #include <ldsodefs.h>
>>> -
>>> -int __cache_line_size attribute_hidden;
>>> -
>>> -/* Scan the Aux Vector for the "Data Cache Block Size" entry.  If found
>>> -   verify that the static extern __cache_line_size is defined by checking
>>> -   for not NULL.  If it is defined then assign the cache block size
>>> -   value to __cache_line_size.  */
>>> -#define DL_PLATFORM_AUXV						      \
>>> -      case AT_DCACHEBSIZE:						      \
>>> -	__cache_line_size = av->a_un.a_val;				      \
>>> -	break;
>>> +#include <dl-auxv.h>
>>>  
>>>  #include <sysdeps/unix/sysv/linux/dl-sysdep.c>
>>
>> This is essentially an empty file that just include dl-auxv.h. I think
>> it would be better to just create a generic empty dl-auxv.h, include it
>> on default dl-sysdep.c, and remove this file.
> 
> I'm fine with this proposal, but I don't think it's going to help neither alpha
> or x86 which have slightly different implementations.

Indeed, but I think we can refactor alpha or x86 later.
Florian Weimer Jan. 9, 2020, 10:29 a.m. UTC | #4
What's the status here?

I think it's desirable to fix this before the release.

Thanks,
Florian
Jeff Law Jan. 9, 2020, 4:43 p.m. UTC | #5
On Thu, 2020-01-09 at 11:29 +0100, Florian Weimer wrote:
> What's the status here?
> 
> I think it's desirable to fix this before the release.
Presumably this fixes the problem with it being used as a common symbol
and thus not working with gcc-10 on ppc64?  If so, yea, seems like it'd
be desirable before the release.

jeff
Tulio Magno Quites Machado Filho Jan. 9, 2020, 5:20 p.m. UTC | #6
Jeff Law <law@redhat.com> writes:

> On Thu, 2020-01-09 at 11:29 +0100, Florian Weimer wrote:
>> What's the status here?
>> 
>> I think it's desirable to fix this before the release.

While working on the changes requested by Adhemerval, I hit 2 other bugs:

1. getauxval() does not work well from a DSO dlopen'ed by a statically linked
   executable.
2. errno from a DSO is lost in a statically linked executable (BZ #25335).

I've fixed the first issue, but I'm stuck with the second one.

> Presumably this fixes the problem with it being used as a common symbol
> and thus not working with gcc-10 on ppc64?  If so, yea, seems like it'd
> be desirable before the release.

The tests I implemented are failing because of these issues.

Would it be fine if I disabled the failing tests while we do not have a fix
for BZ #25335?
Adhemerval Zanella Netto Jan. 9, 2020, 6:03 p.m. UTC | #7
On 09/01/2020 14:20, Tulio Magno Quites Machado Filho wrote:
> Jeff Law <law@redhat.com> writes:
> 
>> On Thu, 2020-01-09 at 11:29 +0100, Florian Weimer wrote:
>>> What's the status here?
>>>
>>> I think it's desirable to fix this before the release.
> 
> While working on the changes requested by Adhemerval, I hit 2 other bugs:
> 
> 1. getauxval() does not work well from a DSO dlopen'ed by a statically linked
>    executable.

This is BZ#20802 and I although we might fix it by adding the initialization
on dl-static.c, I think Florian suggestion [1] to proper initialize shared ld.so
variables is a better overall solution. However it is also a much more extensive 
change and not feasible on current release phase.

> 2. errno from a DSO is lost in a statically linked executable (BZ #25335).
> 
> I've fixed the first issue, but I'm stuck with the second one.

I don't see how easily we would fix it, it probability require to change
how errno is definde in static manner and most likely more hacks to use
the one from loaded libc.so. This would also require some discussion if it 
is really an expected scenario we should support.

> 
>> Presumably this fixes the problem with it being used as a common symbol
>> and thus not working with gcc-10 on ppc64?  If so, yea, seems like it'd
>> be desirable before the release.
> 
> The tests I implemented are failing because of these issues.
> 
> Would it be fine if I disabled the failing tests while we do not have a fix
> for BZ #25335?
> 

You can change the test to save/restore a passed errno in the wrapper
if it is required to check for errno value. I don't see provide a
fix for BZ#25535 as a blocker here.


[1] https://sourceware.org/ml/libc-alpha/2019-12/msg00483.html
Tulio Magno Quites Machado Filho Jan. 9, 2020, 6:49 p.m. UTC | #8
Adhemerval Zanella <adhemerval.zanella@linaro.org> writes:

> This is BZ#20802 and I although we might fix it by adding the initialization
> on dl-static.c, I think Florian suggestion [1] to proper initialize shared ld.so
> variables is a better overall solution. However it is also a much more extensive 
> change and not feasible on current release phase.

Ack.

> I don't see how easily we would fix it, it probability require to change
> how errno is definde in static manner and most likely more hacks to use
> the one from loaded libc.so. This would also require some discussion if it 
> is really an expected scenario we should support.

Makes sense.

>> Would it be fine if I disabled the failing tests while we do not have a fix
>> for BZ #25335?
>
> You can change the test to save/restore a passed errno in the wrapper
> if it is required to check for errno value. I don't see provide a
> fix for BZ#25535 as a blocker here.

What are you referring as a wrapper in the getauxval() case?
Adhemerval Zanella Netto Jan. 9, 2020, 6:54 p.m. UTC | #9
On 09/01/2020 15:49, Tulio Magno Quites Machado Filho wrote:
> Adhemerval Zanella <adhemerval.zanella@linaro.org> writes:
> 
>> This is BZ#20802 and I although we might fix it by adding the initialization
>> on dl-static.c, I think Florian suggestion [1] to proper initialize shared ld.so
>> variables is a better overall solution. However it is also a much more extensive 
>> change and not feasible on current release phase.
> 
> Ack.
> 
>> I don't see how easily we would fix it, it probability require to change
>> how errno is definde in static manner and most likely more hacks to use
>> the one from loaded libc.so. This would also require some discussion if it 
>> is really an expected scenario we should support.
> 
> Makes sense.
> 
>>> Would it be fine if I disabled the failing tests while we do not have a fix
>>> for BZ #25335?
>>
>> You can change the test to save/restore a passed errno in the wrapper
>> if it is required to check for errno value. I don't see provide a
>> fix for BZ#25535 as a blocker here.
> 
> What are you referring as a wrapper in the getauxval() case?
> 

Yes, on the test. Something like:

  unsigned long int
  getauxval_wrapper (unsigned long type, int *errnop)
  {
    errno = *errnop;
    unsigned long int r = getauxval (type);
    *errnop = errno;
    return r;
  }

And then you use on static build which will dlopen it:

  {
    unsigned long int (*wrapper)(unsigned long, int *)
      = xdlsym (handler, "getauxval_wrapper");
    int ierrno = 0;
    TEST_COMPARE (wrapper (...), ...);
    TEST_COMPARE (ierrno, ...);
  }
Tulio Magno Quites Machado Filho Jan. 9, 2020, 6:56 p.m. UTC | #10
Tulio Magno Quites Machado Filho <tuliom@ascii.art.br> writes:

> Adhemerval Zanella <adhemerval.zanella@linaro.org> writes:
>
>> You can change the test to save/restore a passed errno in the wrapper
>> if it is required to check for errno value. I don't see provide a
>> fix for BZ#25535 as a blocker here.
>
> What are you referring as a wrapper in the getauxval() case?

Oh!  Forget it.  You're referring to Florian's patch.
I'll use that.

Thanks!
Florian Weimer Jan. 10, 2020, 9:38 a.m. UTC | #11
* Tulio Magno Quites Machado Filho:

> Jeff Law <law@redhat.com> writes:
>
>> On Thu, 2020-01-09 at 11:29 +0100, Florian Weimer wrote:
>>> What's the status here?
>>> 
>>> I think it's desirable to fix this before the release.
>
> While working on the changes requested by Adhemerval, I hit 2 other bugs:
>
> 1. getauxval() does not work well from a DSO dlopen'ed by a statically linked
>    executable.
> 2. errno from a DSO is lost in a statically linked executable (BZ #25335).
>
> I've fixed the first issue, but I'm stuck with the second one.

Based on the subsequent discussion, they are no longer blockers, right?

Thanks,
Florian
Shawn Landden Jan. 10, 2020, 4:09 p.m. UTC | #12
Not trying to say anything stupid, but why can't we just detect the cache line size using the "dcbz" instruction, as documented in the PowerISA document? It is because the ancient G5 also has a "dcbzl" instruction that clears a wider size (the actually cache line size)?
Adhemerval Zanella Netto Jan. 10, 2020, 6:19 p.m. UTC | #13
On 10/01/2020 13:09, Shawn Landden wrote:
> Not trying to say anything stupid, but why can't we just detect the cache line size using the "dcbz" instruction, as documented in the PowerISA document? It is because the ancient G5 also has a "dcbzl" instruction that clears a wider size (the actually cache line size)?
> 

AFAIK you can't detect the data cache block size using the dcbz instruction,
in fact PowerISA 3.0B states that this information should be provided by the
operation system (Book II, Chapter 4.1 Parameters Useful to Application
Programs).

Linux provided it by AT_DCACHEBSIZE field in auxv and it is implemented in the
kernel by a pre-defined table (arch/powerpc/kernel/cputable.c). This is how
glibc obtain such information (sysdeps/unix/sysv/linux/powerpc/dl-sysdep.c).

The issue I pointed out is for static binaries that dlopen a shared library,
mem* calls done by the library (which will call the loaded libc.so one) won't
see the field properly initialized. Some architectures fix rtld initialization
by reimplementing the _dl_static_init.  It looks up for the '_dl_var_init' and
calls it, taking care of relro segments by unmap/mmap the segment.

And the dcbzl seems to be a hack pushed by Apple for some reason, which should
not be required on Linux:

"dcbz" only operates on 32 bytes when the special HID5 compatiblity bit that 
apple added to the 970 is set. This is _NOT_ the normal case and this bit isn't 
set in linux unless you explicitely set it by modifying the kernel [1]." 

[1] https://lists.ozlabs.org/pipermail/linuxppc64-dev/2004-March/001383.html
diff mbox series

Patch

diff --git a/sysdeps/powerpc/dl-procinfo.c b/sysdeps/powerpc/dl-procinfo.c
index 20706a648a..94b33d0c16 100644
--- a/sysdeps/powerpc/dl-procinfo.c
+++ b/sysdeps/powerpc/dl-procinfo.c
@@ -89,5 +89,22 @@  PROCINFO_CLASS const char _dl_powerpc_cap_flags[64][15]
 ,
 #endif
 
+#if !IS_IN (ldconfig)
+# if !defined PROCINFO_DECL && defined SHARED
+     ._dl_cache_line_size
+# else
+PROCINFO_CLASS int _dl_cache_line_size
+# endif
+# ifndef PROCINFO_DECL
+     = 0
+# endif
+# if !defined SHARED || defined PROCINFO_DECL
+;
+# else
+,
+# endif
+#endif
+
+
 #undef PROCINFO_DECL
 #undef PROCINFO_CLASS
diff --git a/sysdeps/powerpc/powerpc32/a2/memcpy.S b/sysdeps/powerpc/powerpc32/a2/memcpy.S
index 9bc91a8df1..ecfea5c832 100644
--- a/sysdeps/powerpc/powerpc32/a2/memcpy.S
+++ b/sysdeps/powerpc/powerpc32/a2/memcpy.S
@@ -18,6 +18,7 @@ 
    <https://www.gnu.org/licenses/>.  */
 
 #include <sysdep.h>
+#include <rtld-global-offsets.h>
 
 #define PREFETCH_AHEAD 4        /* no cache lines SRC prefetching ahead  */
 #define ZERO_AHEAD 2            /* no cache lines DST zeroing ahead  */
@@ -106,25 +107,23 @@  EALIGN (memcpy, 5, 0)
 L(dst_aligned):
 
 
-#ifdef SHARED
+#ifdef PIC
 	mflr    r0
-/* Establishes GOT addressability so we can load __cache_line_size
-   from static. This value was set from the aux vector during startup.  */
+/* Establishes GOT addressability so we can load the cache line size
+   from rtld_global_ro. This value was set from the aux vector during
+   startup.  */
 	SETUP_GOT_ACCESS(r9,got_label)
-	addis   r9,r9,__cache_line_size-got_label@ha
-	lwz     r9,__cache_line_size-got_label@l(r9)
-	mtlr    r0
-#else
-/* Load __cache_line_size from static. This value was set from the
-   aux vector during startup.  */
-	lis     r9,__cache_line_size@ha
-	lwz     r9,__cache_line_size@l(r9)
+	addis	r9,r9,_GLOBAL_OFFSET_TABLE_-got_label@ha
+	addi	r9,r9,_GLOBAL_OFFSET_TABLE_-got_label@l
+	mtlr	r0
 #endif
+	__GLRO(r9, r9, _dl_cache_line_size,
+	       RTLD_GLOBAL_RO_DL_CACHE_LINE_SIZE_OFFSET)
 
 	cmplwi  cr5, r9, 0
 	bne+    cr5,L(cachelineset)
 
-/* __cache_line_size not set: generic byte copy without much optimization */
+/* Cache line size not set: generic byte copy without much optimization */
 	andi.	r0,r5,1		/* If length is odd copy one byte.  */
 	beq	L(cachelinenotset_align)
 	lbz	r7,0(r4)	/* Read one byte from source.  */
diff --git a/sysdeps/powerpc/powerpc32/dl-machine.c b/sysdeps/powerpc/powerpc32/dl-machine.c
index a90cbc1ae3..740ff8f2ec 100644
--- a/sysdeps/powerpc/powerpc32/dl-machine.c
+++ b/sysdeps/powerpc/powerpc32/dl-machine.c
@@ -25,11 +25,6 @@ 
 #include <dl-machine.h>
 #include <_itoa.h>
 
-/* The value __cache_line_size is defined in dl-sysdep.c and is initialised
-   by _dl_sysdep_start via DL_PLATFORM_INIT.  */
-extern int __cache_line_size attribute_hidden;
-
-
 /* Stuff for the PLT.  */
 #define PLT_INITIAL_ENTRY_WORDS 18
 #define PLT_LONGBRANCH_ENTRY_WORDS 0
@@ -309,14 +304,14 @@  __elf_machine_runtime_setup (struct link_map *map, int lazy, int profile)
 
 	 Assumes that dcbst and icbi apply to lines of 16 bytes or
 	 more.  Current known line sizes are 16, 32, and 128 bytes.
-	 The following gets the __cache_line_size, when available.  */
+	 The following gets the cache line size, when available.  */
 
       /* Default minimum 4 words per cache line.  */
       int line_size_words = 4;
 
-      if (lazy && __cache_line_size != 0)
+      if (lazy && GLRO(dl_cache_line_size) != 0)
 	/* Convert bytes to words.  */
-	line_size_words = __cache_line_size / 4;
+	line_size_words = GLRO(dl_cache_line_size) / 4;
 
       size_modified = lazy ? rel_offset_words : 6;
       for (i = 0; i < size_modified; i += line_size_words)
diff --git a/sysdeps/powerpc/powerpc32/memset.S b/sysdeps/powerpc/powerpc32/memset.S
index fc8b1a2547..ea96913e4e 100644
--- a/sysdeps/powerpc/powerpc32/memset.S
+++ b/sysdeps/powerpc/powerpc32/memset.S
@@ -17,12 +17,13 @@ 
    <https://www.gnu.org/licenses/>.  */
 
 #include <sysdep.h>
+#include <rtld-global-offsets.h>
 
 /* void * [r3] memset (void *s [r3], int c [r4], size_t n [r5]));
    Returns 's'.
 
    The memset is done in four sizes: byte (8 bits), word (32 bits),
-   32-byte blocks (256 bits) and __cache_line_size (128, 256, 1024 bits).
+   32-byte blocks (256 bits) and cache line size (128, 256, 1024 bits).
    There is a special case for setting whole cache lines to 0, which
    takes advantage of the dcbz instruction.  */
 
@@ -95,7 +96,7 @@  L(caligned):
 
 /* Check if we can use the special case for clearing memory using dcbz.
    This requires that we know the correct cache line size for this
-   processor.  Getting the __cache_line_size may require establishing GOT
+   processor.  Getting the cache line size may require establishing GOT
    addressability, so branch out of line to set this up.  */
 	beq	cr1, L(checklinesize)
 
@@ -230,26 +231,22 @@  L(medium_28t):
 	blr
 
 L(checklinesize):
-#ifdef SHARED
-	mflr	rTMP
 /* If the remaining length is less the 32 bytes then don't bother getting
    the cache line size.  */
 	beq	L(medium)
-/* Establishes GOT addressability so we can load __cache_line_size
-   from static. This value was set from the aux vector during startup.  */
+#ifdef PIC
+	mflr	rTMP
+/* Establishes GOT addressability so we can load the cache line size
+   from rtld_global_ro. This value was set from the aux vector during
+   startup.  */
 	SETUP_GOT_ACCESS(rGOT,got_label)
-	addis	rGOT,rGOT,__cache_line_size-got_label@ha
-	lwz	rCLS,__cache_line_size-got_label@l(rGOT)
+	addis	rGOT,rGOT,_GLOBAL_OFFSET_TABLE_-got_label@ha
+	addi	rGOT,rGOT,_GLOBAL_OFFSET_TABLE_-got_label@l
 	mtlr	rTMP
-#else
-/* Load __cache_line_size from static. This value was set from the
-   aux vector during startup.  */
-	lis	rCLS,__cache_line_size@ha
-/* If the remaining length is less the 32 bytes then don't bother getting
-   the cache line size.  */
-	beq	L(medium)
-	lwz	rCLS,__cache_line_size@l(rCLS)
 #endif
+/* Load rtld_global_ro._dl_cache_line_size.  */
+	__GLRO(rCLS, rGOT, _dl_cache_line_size,
+	       RTLD_GLOBAL_RO_DL_CACHE_LINE_SIZE_OFFSET)
 
 /* If the cache line size was not set then goto to L(nondcbz), which is
    safe for any cache line size.  */
diff --git a/sysdeps/powerpc/powerpc32/sysdep.h b/sysdeps/powerpc/powerpc32/sysdep.h
index c21ea87f5a..4605276ebf 100644
--- a/sysdeps/powerpc/powerpc32/sysdep.h
+++ b/sysdeps/powerpc/powerpc32/sysdep.h
@@ -157,4 +157,30 @@  GOT_LABEL:			;					      \
 /* Label in text section.  */
 #define C_TEXT(name) name
 
+/* Read the value of member from rtld_global_ro.  */
+#ifdef PIC
+# ifdef SHARED
+#  if IS_IN (rtld)
+/* Inside ld.so we use the local alias to avoid runtime GOT
+   relocations.  */
+#   define __GLRO(rOUT, rGOT, member, offset)				\
+	lwz     rOUT,_rtld_local_ro@got(rGOT);				\
+	lwz     rOUT,offset(rOUT)
+#  else
+#   define __GLRO(rOUT, rGOT, member, offset)				\
+	lwz     rOUT,_rtld_global_ro@got(rGOT);				\
+	lwz     rOUT,offset(rOUT)
+#  endif
+# else
+#  define __GLRO(rOUT, rGOT, member, offset)				\
+	lwz     rOUT,member@got(rGOT);					\
+	lwz     rOUT,0(rOUT)
+# endif
+#else
+/* Position-dependent code does not require access to the GOT.  */
+# define __GLRO(rOUT, rGOT, member, offset)				\
+	lis     rOUT,(member+LOWORD)@ha					\
+	lwz     rOUT,(member+LOWORD)@l(rOUT)
+#endif	/* PIC */
+
 #endif	/* __ASSEMBLER__ */
diff --git a/sysdeps/powerpc/powerpc64/a2/memcpy.S b/sysdeps/powerpc/powerpc64/a2/memcpy.S
index 6d5c5afddb..e515255126 100644
--- a/sysdeps/powerpc/powerpc64/a2/memcpy.S
+++ b/sysdeps/powerpc/powerpc64/a2/memcpy.S
@@ -18,6 +18,7 @@ 
    <https://www.gnu.org/licenses/>.  */
 
 #include <sysdep.h>
+#include <rtld-global-offsets.h>
 
 #ifndef MEMCPY
 # define MEMCPY memcpy
@@ -27,8 +28,19 @@ 
 #define ZERO_AHEAD 2            /* no cache lines DST zeroing ahead  */
 
 	.section        ".toc","aw"
-.LC0:
-	.tc __cache_line_size[TC],__cache_line_size
+.LC__dl_cache_line_size:
+#ifdef SHARED
+# if IS_IN (rtld)
+	/* Inside ld.so we use the local alias to avoid runtime GOT
+	   relocations.  */
+	.tc _rtld_local_ro[TC],_rtld_local_ro
+# else
+	.tc _rtld_global_ro[TC],_rtld_global_ro
+# endif
+#else
+	.tc _dl_cache_line_size[TC],_dl_cache_line_size
+#endif
+
 	.section        ".text"
 	.align 2
 
@@ -55,7 +67,8 @@  ENTRY (MEMCPY, 5)
 	*/
 
 	neg     r8,r3           /* LS 4 bits = # bytes to 8-byte dest bdry  */
-	ld      r9,.LC0@toc(r2) /* Get cache line size (part 1) */
+	/* Get cache line size (part 1) */
+	ld      r9,.LC__dl_cache_line_size@toc(r2)
 	clrldi  r8,r8,64-4      /* align to 16byte boundary  */
 	sub     r7,r4,r3        /* compute offset to src from dest */
 	lwz     r9,0(r9)        /* Get cache line size (part 2) */
@@ -121,7 +134,7 @@  L(dst_aligned):
 	cmpdi	cr0,r9,0	/* Cache line size set? */
 	bne+	cr0,L(cachelineset)
 
-/* __cache_line_size not set: generic byte copy without much optimization */
+/* Cache line size not set: generic byte copy without much optimization */
 	clrldi.	r0,r5,63	/* If length is odd copy one byte */
 	beq	L(cachelinenotset_align)
 	lbz	r7,0(r4)	/* Read one byte from source */
diff --git a/sysdeps/powerpc/powerpc64/memset.S b/sysdeps/powerpc/powerpc64/memset.S
index 5d96696e87..d49ef102d4 100644
--- a/sysdeps/powerpc/powerpc64/memset.S
+++ b/sysdeps/powerpc/powerpc64/memset.S
@@ -17,10 +17,22 @@ 
    <https://www.gnu.org/licenses/>.  */
 
 #include <sysdep.h>
+#include <rtld-global-offsets.h>
 
 	.section	".toc","aw"
-.LC0:
-	.tc __cache_line_size[TC],__cache_line_size
+.LC__dl_cache_line_size:
+#ifdef SHARED
+# if IS_IN (rtld)
+	/* Inside ld.so we use the local alias to avoid runtime GOT
+	   relocations.  */
+	.tc _rtld_local_ro[TC],_rtld_local_ro
+# else
+	.tc _rtld_global_ro[TC],_rtld_global_ro
+# endif
+#else
+	.tc _dl_cache_line_size[TC],_dl_cache_line_size
+#endif
+
 	.section	".text"
 	.align 2
 
@@ -146,8 +158,14 @@  L(zloopstart):
 /* If the remaining length is less the 32 bytes, don't bother getting
 	 the cache line size.  */
 	beq	L(medium)
-	ld	rCLS,.LC0@toc(r2)
+	ld	rCLS,.LC__dl_cache_line_size@toc(r2)
+# ifdef SHARED
+	/* Load _rtld_global_ro._dl_cache_line_size.  */
+	lwz	rCLS,RTLD_GLOBAL_RO_DL_CACHE_LINE_SIZE_OFFSET(rCLS)
+# else
+	/* Load _dl_cache_line_size.  */
 	lwz	rCLS,0(rCLS)
+# endif
 /* If the cache line size was not set just goto to L(nondcbz) which is
 	 safe for any cache line size.  */
 	cmpldi	cr1,rCLS,0
diff --git a/sysdeps/powerpc/rtld-global-offsets.sym b/sysdeps/powerpc/rtld-global-offsets.sym
index f5ea5a1466..6b348fd522 100644
--- a/sysdeps/powerpc/rtld-global-offsets.sym
+++ b/sysdeps/powerpc/rtld-global-offsets.sym
@@ -6,3 +6,4 @@ 
 
 RTLD_GLOBAL_RO_DL_HWCAP_OFFSET	rtld_global_ro_offsetof (_dl_hwcap)
 RTLD_GLOBAL_RO_DL_HWCAP2_OFFSET	rtld_global_ro_offsetof (_dl_hwcap2)
+RTLD_GLOBAL_RO_DL_CACHE_LINE_SIZE_OFFSET	rtld_global_ro_offsetof (_dl_cache_line_size)
diff --git a/sysdeps/unix/sysv/linux/powerpc/dl-auxv.h b/sysdeps/unix/sysv/linux/powerpc/dl-auxv.h
new file mode 100644
index 0000000000..44bea09974
--- /dev/null
+++ b/sysdeps/unix/sysv/linux/powerpc/dl-auxv.h
@@ -0,0 +1,24 @@ 
+/* Auxiliary vector processing.  Linux/PPC version.
+   Copyright (C) 2019 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <https://www.gnu.org/licenses/>.  */
+
+/* Scan the Aux Vector for the "Data Cache Block Size" entry and assign it
+   to dl_cache_line_size.  */
+#define DL_PLATFORM_AUXV						      \
+      case AT_DCACHEBSIZE:						      \
+	GLRO(dl_cache_line_size) = av->a_un.a_val;			      \
+	break;
diff --git a/sysdeps/unix/sysv/linux/powerpc/dl-support.c b/sysdeps/unix/sysv/linux/powerpc/dl-support.c
new file mode 100644
index 0000000000..2a792d7092
--- /dev/null
+++ b/sysdeps/unix/sysv/linux/powerpc/dl-support.c
@@ -0,0 +1,23 @@ 
+/* Support for dynamic linking code in static libc.  Linux/PPC version.
+   Copyright (C) 2019 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <https://www.gnu.org/licenses/>.  */
+
+#include "dl-auxv.h"
+#include <ldsodefs.h>
+int GLRO(dl_cache_line_size);
+
+#include <elf/dl-support.c>
diff --git a/sysdeps/unix/sysv/linux/powerpc/dl-sysdep.c b/sysdeps/unix/sysv/linux/powerpc/dl-sysdep.c
index fa19cc66c0..6d51d6061e 100644
--- a/sysdeps/unix/sysv/linux/powerpc/dl-sysdep.c
+++ b/sysdeps/unix/sysv/linux/powerpc/dl-sysdep.c
@@ -18,16 +18,6 @@ 
 
 #include <config.h>
 #include <ldsodefs.h>
-
-int __cache_line_size attribute_hidden;
-
-/* Scan the Aux Vector for the "Data Cache Block Size" entry.  If found
-   verify that the static extern __cache_line_size is defined by checking
-   for not NULL.  If it is defined then assign the cache block size
-   value to __cache_line_size.  */
-#define DL_PLATFORM_AUXV						      \
-      case AT_DCACHEBSIZE:						      \
-	__cache_line_size = av->a_un.a_val;				      \
-	break;
+#include <dl-auxv.h>
 
 #include <sysdeps/unix/sysv/linux/dl-sysdep.c>
diff --git a/sysdeps/unix/sysv/linux/powerpc/libc-start.c b/sysdeps/unix/sysv/linux/powerpc/libc-start.c
index a659a9144f..3a5eb0e952 100644
--- a/sysdeps/unix/sysv/linux/powerpc/libc-start.c
+++ b/sysdeps/unix/sysv/linux/powerpc/libc-start.c
@@ -24,7 +24,6 @@ 
 #include <hwcapinfo.h>
 #endif
 
-int __cache_line_size attribute_hidden;
 /* The main work is done in the generic function.  */
 #define LIBC_START_MAIN generic_start_main
 #define LIBC_START_DISABLE_INLINE
@@ -71,15 +70,12 @@  __libc_start_main (int argc, char **argv,
       rtld_fini = NULL;
     }
 
-  /* Initialize the __cache_line_size variable from the aux vector.  For the
-     static case, we also need _dl_hwcap, _dl_hwcap2 and _dl_platform, so we
-     can call __tcb_parse_hwcap_and_convert_at_platform ().  */
   for (ElfW (auxv_t) * av = auxvec; av->a_type != AT_NULL; ++av)
     switch (av->a_type)
       {
-      case AT_DCACHEBSIZE:
-	__cache_line_size = av->a_un.a_val;
-	break;
+      /* For the static case, we also need _dl_hwcap, _dl_hwcap2 and
+         _dl_platform, so we can call
+         __tcb_parse_hwcap_and_convert_at_platform ().  */
 #ifndef SHARED
       case AT_HWCAP:
 	_dl_hwcap = (unsigned long int) av->a_un.a_val;