diff mbox

[1/2] Add private_function for private functions within glibc

Message ID CAMe9rOrMc4drUDum4ku+CEXSM8xafdj-0UYTAGfc-v8GEM6=xg@mail.gmail.com
State New
Headers show

Commit Message

H.J. Lu June 22, 2017, 4:21 p.m. UTC
On Thu, Jun 22, 2017 at 7:41 AM, Florian Weimer <fweimer@redhat.com> wrote:
> On 06/22/2017 04:38 PM, H.J. Lu wrote:
>> On Thu, Jun 22, 2017 at 6:40 AM, Florian Weimer <fweimer@redhat.com> wrote:
>>> On 06/17/2017 03:59 PM, H.J. Lu wrote:
>>>> Here parameters are passed to _dl_init in registers.  I want to minimize
>>>> changes to avoid any potential issues.
>>>
>>> Well, as a rule of thumb, if we do something that breaks our own code,
>>> it is pretty much guaranteed to wreak havoc across the board (because
>>> our test coverage is somewhat poor).
>>>
>>> I see a lot of use of regparm (3).  For example:
>>>
>>> $ echo '#include <Qt/qchar.h>' | g++ -m32 -E -x c++ - | grep regparm
>>>     static Category __attribute__((regparm(3))) category(uint ucs4);
>>>     static Category __attribute__((regparm(3))) category(ushort ucs2);
>>>     static Direction __attribute__((regparm(3))) direction(uint ucs4);
>>>     static Direction __attribute__((regparm(3))) direction(ushort ucs2);
>>>     static Joining __attribute__((regparm(3))) joining(uint ucs4);
>>> …
>>>
>>> I think these calls actually cross DSO boundaries.
>>
>> I don't think so.  See "static ...".
>
> It's C++ code, so it just means there's no this pointer.
>

How about this patch?

Comments

Florian Weimer June 22, 2017, 4:37 p.m. UTC | #1
On 06/22/2017 06:21 PM, H.J. Lu wrote:
> On Thu, Jun 22, 2017 at 7:41 AM, Florian Weimer <fweimer@redhat.com> wrote:
>> On 06/22/2017 04:38 PM, H.J. Lu wrote:
>>> On Thu, Jun 22, 2017 at 6:40 AM, Florian Weimer <fweimer@redhat.com> wrote:
>>>> On 06/17/2017 03:59 PM, H.J. Lu wrote:
>>>>> Here parameters are passed to _dl_init in registers.  I want to minimize
>>>>> changes to avoid any potential issues.
>>>> Well, as a rule of thumb, if we do something that breaks our own code,
>>>> it is pretty much guaranteed to wreak havoc across the board (because
>>>> our test coverage is somewhat poor).
>>>>
>>>> I see a lot of use of regparm (3).  For example:
>>>>
>>>> $ echo '#include <Qt/qchar.h>' | g++ -m32 -E -x c++ - | grep regparm
>>>>     static Category __attribute__((regparm(3))) category(uint ucs4);
>>>>     static Category __attribute__((regparm(3))) category(ushort ucs2);
>>>>     static Direction __attribute__((regparm(3))) direction(uint ucs4);
>>>>     static Direction __attribute__((regparm(3))) direction(ushort ucs2);
>>>>     static Joining __attribute__((regparm(3))) joining(uint ucs4);
>>>> …
>>>>
>>>> I think these calls actually cross DSO boundaries.
>>> I don't think so.  See "static ...".
>> It's C++ code, so it just means there's no this pointer.
>>
> How about this patch?

Yes, it addresses my concerns.

> +# The SHSTK compatible version.
> +	.text
> +	.globl _dl_runtime_resolve_shstk
> +	.type _dl_runtime_resolve_shstk, @function
> +	cfi_startproc
> +	.align 16
> +_dl_runtime_resolve_shstk:
> +	cfi_adjust_cfa_offset (8)
> +	pushl %eax		# Preserve registers otherwise clobbered.
> +	cfi_adjust_cfa_offset (4)
> +	pushl %edx
> +	cfi_adjust_cfa_offset (4)
> +	movl 12(%esp), %edx	# Copy args pushed by PLT in register.  Note
> +	movl 8(%esp), %eax	# that `fixup' takes its parameters in regs.
> +	call _dl_fixup		# Call resolver.
> +	movl (%esp), %edx	# Get register content back.
> +	movl %eax, %ecx		# Store the function address.
> +	movl 4(%esp), %eax	# Get register content back.
> +	addl $16, %esp		# Adjust stack: PLT1 + PLT2 + %eax + %edx
> +	cfi_adjust_cfa_offset (-16)
> +	jmp *%ecx		# Jump to function address.
> +	cfi_endproc
> +	.size _dl_runtime_resolve_shstk, .-_dl_runtime_resolve_shstk

Are the CFI annotations correct?  I think the offset after the push %eax
should be 8, not 12, as it currently appears to be.  CFI annotations for
the spill slots of %eax and %edx would be nice. too.

It seems to me that _dl_fixup is called with an unaligned stack, either
in this implementation or the existing one.  Don't we care about this
because we compile the dynamic linker without SSE support?

Thanks,
Florian
H.J. Lu June 22, 2017, 4:58 p.m. UTC | #2
On Thu, Jun 22, 2017 at 9:37 AM, Florian Weimer <fweimer@redhat.com> wrote:
> On 06/22/2017 06:21 PM, H.J. Lu wrote:
>> On Thu, Jun 22, 2017 at 7:41 AM, Florian Weimer <fweimer@redhat.com> wrote:
>>> On 06/22/2017 04:38 PM, H.J. Lu wrote:
>>>> On Thu, Jun 22, 2017 at 6:40 AM, Florian Weimer <fweimer@redhat.com> wrote:
>>>>> On 06/17/2017 03:59 PM, H.J. Lu wrote:
>>>>>> Here parameters are passed to _dl_init in registers.  I want to minimize
>>>>>> changes to avoid any potential issues.
>>>>> Well, as a rule of thumb, if we do something that breaks our own code,
>>>>> it is pretty much guaranteed to wreak havoc across the board (because
>>>>> our test coverage is somewhat poor).
>>>>>
>>>>> I see a lot of use of regparm (3).  For example:
>>>>>
>>>>> $ echo '#include <Qt/qchar.h>' | g++ -m32 -E -x c++ - | grep regparm
>>>>>     static Category __attribute__((regparm(3))) category(uint ucs4);
>>>>>     static Category __attribute__((regparm(3))) category(ushort ucs2);
>>>>>     static Direction __attribute__((regparm(3))) direction(uint ucs4);
>>>>>     static Direction __attribute__((regparm(3))) direction(ushort ucs2);
>>>>>     static Joining __attribute__((regparm(3))) joining(uint ucs4);
>>>>> …
>>>>>
>>>>> I think these calls actually cross DSO boundaries.
>>>> I don't think so.  See "static ...".
>>> It's C++ code, so it just means there's no this pointer.
>>>
>> How about this patch?
>
> Yes, it addresses my concerns.
>
>> +# The SHSTK compatible version.
>> +     .text
>> +     .globl _dl_runtime_resolve_shstk
>> +     .type _dl_runtime_resolve_shstk, @function
>> +     cfi_startproc
>> +     .align 16
>> +_dl_runtime_resolve_shstk:
>> +     cfi_adjust_cfa_offset (8)
>> +     pushl %eax              # Preserve registers otherwise clobbered.
>> +     cfi_adjust_cfa_offset (4)
>> +     pushl %edx
>> +     cfi_adjust_cfa_offset (4)
>> +     movl 12(%esp), %edx     # Copy args pushed by PLT in register.  Note
>> +     movl 8(%esp), %eax      # that `fixup' takes its parameters in regs.
>> +     call _dl_fixup          # Call resolver.
>> +     movl (%esp), %edx       # Get register content back.
>> +     movl %eax, %ecx         # Store the function address.
>> +     movl 4(%esp), %eax      # Get register content back.
>> +     addl $16, %esp          # Adjust stack: PLT1 + PLT2 + %eax + %edx
>> +     cfi_adjust_cfa_offset (-16)
>> +     jmp *%ecx               # Jump to function address.
>> +     cfi_endproc
>> +     .size _dl_runtime_resolve_shstk, .-_dl_runtime_resolve_shstk
>
> Are the CFI annotations correct?  I think the offset after the push %eax
> should be 8, not 12, as it currently appears to be.  CFI annotations for

There are 2 pushes for PLT and add push %eax.  The offset is 12.

> the spill slots of %eax and %edx would be nice. too.

Don't we have

pushl %eax # Preserve registers otherwise clobbered.
cfi_adjust_cfa_offset (4)
pushl %edx
cfi_adjust_cfa_offset (4)


> It seems to me that _dl_fixup is called with an unaligned stack, either
> in this implementation or the existing one.  Don't we care about this
> because we compile the dynamic linker without SSE support?
>

Probably.  But it is a separate issue.
diff mbox

Patch

From bc18b43086c2ac1efb379ba83aae351d532963f7 Mon Sep 17 00:00:00 2001
From: "H.J. Lu" <hjl.tools@gmail.com>
Date: Thu, 22 Jun 2017 08:51:42 -0700
Subject: [PATCH] i386: Add _dl_runtime_resolve_shstk [BZ #21598]

Add a SHSTK compatible symbol resolver to support Shadow Stack in Intel
Control-flow Enforcement Technology (CET) instructions:

    https://software.intel.com/sites/default/files/managed/4d/2a/control-flow-enforcement-technology-preview.pdf

	[BZ #21598]
	* sysdeps/i386/dl-machine.h (elf_machine_runtime_setup): Use
	_dl_runtime_resolve_shstk if SHSTK is usable.
	* sysdeps/i386/dl-trampoline.S (_dl_runtime_resolve_shstk): New.
	* sysdeps/x86/cpu-features.h (bit_arch_IBT_Usable): New.
	(bit_arch_SHSTK_Usable): Likewise.
	(bit_cpu_SHSTK): Likewise.
	(index_cpu_IBT): Likewise.
	(index_cpu_SHSTK): Likewise.
	(index_arch_IBT_Usable): Likewise.
	(index_arch_SHSTK_Usable): Likewise.
	(reg_IBT): Likewise.
	(reg_SHSTK): Likewise.
---
 sysdeps/i386/dl-machine.h    |  5 ++++-
 sysdeps/i386/dl-trampoline.S | 23 +++++++++++++++++++++++
 sysdeps/x86/cpu-features.h   | 14 ++++++++++++++
 3 files changed, 41 insertions(+), 1 deletion(-)

diff --git a/sysdeps/i386/dl-machine.h b/sysdeps/i386/dl-machine.h
index 9ee9d02..598608e 100644
--- a/sysdeps/i386/dl-machine.h
+++ b/sysdeps/i386/dl-machine.h
@@ -66,6 +66,7 @@  elf_machine_runtime_setup (struct link_map *l, int lazy, int profile)
 {
   Elf32_Addr *got;
   extern void _dl_runtime_resolve (Elf32_Word) attribute_hidden;
+  extern void _dl_runtime_resolve_shstk (Elf32_Word) attribute_hidden;
   extern void _dl_runtime_profile (Elf32_Word) attribute_hidden;
 
   if (l->l_info[DT_JMPREL] && lazy)
@@ -104,7 +105,9 @@  elf_machine_runtime_setup (struct link_map *l, int lazy, int profile)
       else
 	/* This function will get called to fix up the GOT entry indicated by
 	   the offset on the stack, and then jump to the resolved address.  */
-	got[2] = (Elf32_Addr) &_dl_runtime_resolve;
+	got[2] = (HAS_ARCH_FEATURE (SHSTK_Usable)
+		  ? (Elf32_Addr) &_dl_runtime_resolve_shstk
+		  : (Elf32_Addr) &_dl_runtime_resolve);
     }
 
   return lazy;
diff --git a/sysdeps/i386/dl-trampoline.S b/sysdeps/i386/dl-trampoline.S
index 6e7f3ae..92029f7 100644
--- a/sysdeps/i386/dl-trampoline.S
+++ b/sysdeps/i386/dl-trampoline.S
@@ -50,6 +50,29 @@  _dl_runtime_resolve:
 	cfi_endproc
 	.size _dl_runtime_resolve, .-_dl_runtime_resolve
 
+# The SHSTK compatible version.
+	.text
+	.globl _dl_runtime_resolve_shstk
+	.type _dl_runtime_resolve_shstk, @function
+	cfi_startproc
+	.align 16
+_dl_runtime_resolve_shstk:
+	cfi_adjust_cfa_offset (8)
+	pushl %eax		# Preserve registers otherwise clobbered.
+	cfi_adjust_cfa_offset (4)
+	pushl %edx
+	cfi_adjust_cfa_offset (4)
+	movl 12(%esp), %edx	# Copy args pushed by PLT in register.  Note
+	movl 8(%esp), %eax	# that `fixup' takes its parameters in regs.
+	call _dl_fixup		# Call resolver.
+	movl (%esp), %edx	# Get register content back.
+	movl %eax, %ecx		# Store the function address.
+	movl 4(%esp), %eax	# Get register content back.
+	addl $16, %esp		# Adjust stack: PLT1 + PLT2 + %eax + %edx
+	cfi_adjust_cfa_offset (-16)
+	jmp *%ecx		# Jump to function address.
+	cfi_endproc
+	.size _dl_runtime_resolve_shstk, .-_dl_runtime_resolve_shstk
 
 #ifndef PROF
 	.globl _dl_runtime_profile
diff --git a/sysdeps/x86/cpu-features.h b/sysdeps/x86/cpu-features.h
index 3ed67f5..d2a9929 100644
--- a/sysdeps/x86/cpu-features.h
+++ b/sysdeps/x86/cpu-features.h
@@ -40,6 +40,8 @@ 
 #define bit_arch_Use_dl_runtime_resolve_opt	(1 << 20)
 #define bit_arch_Use_dl_runtime_resolve_slow	(1 << 21)
 #define bit_arch_Prefer_No_AVX512		(1 << 22)
+#define bit_arch_IBT_Usable			(1 << 23)
+#define bit_arch_SHSTK_Usable			(1 << 24)
 
 /* CPUID Feature flags.  */
 
@@ -74,6 +76,8 @@ 
 #define bit_cpu_AVX512CD	(1 << 28)
 #define bit_cpu_AVX512BW	(1 << 30)
 #define bit_cpu_AVX512VL	(1u << 31)
+#define bit_cpu_IBT		(1u << 20)
+#define bit_cpu_SHSTK		(1u << 7)
 
 /* XCR0 Feature flags.  */
 #define bit_XMM_state		(1 << 1)
@@ -103,6 +107,8 @@ 
 # define index_cpu_AVX2	COMMON_CPUID_INDEX_7*CPUID_SIZE+CPUID_EBX_OFFSET
 # define index_cpu_ERMS	COMMON_CPUID_INDEX_7*CPUID_SIZE+CPUID_EBX_OFFSET
 # define index_cpu_MOVBE COMMON_CPUID_INDEX_1*CPUID_SIZE+CPUID_ECX_OFFSET
+# define index_cpu_IBT	COMMON_CPUID_INDEX_7*CPUID_SIZE+CPUID_EDX_OFFSET
+# define index_cpu_SHSTK COMMON_CPUID_INDEX_7*CPUID_SIZE+CPUID_ECX_OFFSET
 
 # define index_arch_Fast_Rep_String	FEATURE_INDEX_1*FEATURE_SIZE
 # define index_arch_Fast_Copy_Backward	FEATURE_INDEX_1*FEATURE_SIZE
@@ -126,6 +132,8 @@ 
 # define index_arch_Use_dl_runtime_resolve_opt FEATURE_INDEX_1*FEATURE_SIZE
 # define index_arch_Use_dl_runtime_resolve_slow FEATURE_INDEX_1*FEATURE_SIZE
 # define index_arch_Prefer_No_AVX512	FEATURE_INDEX_1*FEATURE_SIZE
+# define index_arch_IBT_Usable		FEATURE_INDEX_1*FEATURE_SIZE
+# define index_arch_SHSTK_Usable	FEATURE_INDEX_1*FEATURE_SIZE
 
 
 # if defined (_LIBC) && !IS_IN (nonlib)
@@ -277,6 +285,8 @@  extern const struct cpu_features *__get_cpu_features (void)
 # define index_cpu_LZCNT	COMMON_CPUID_INDEX_1
 # define index_cpu_MOVBE	COMMON_CPUID_INDEX_1
 # define index_cpu_POPCNT	COMMON_CPUID_INDEX_1
+# define index_cpu_IBT		COMMON_CPUID_INDEX_7
+# define index_cpu_SHSTK	COMMON_CPUID_INDEX_7
 
 # define reg_CX8		edx
 # define reg_CMOV		edx
@@ -306,6 +316,8 @@  extern const struct cpu_features *__get_cpu_features (void)
 # define reg_LZCNT		ecx
 # define reg_MOVBE		ecx
 # define reg_POPCNT		ecx
+# define reg_IBT		edx
+# define reg_SHSTK		ecx
 
 # define index_arch_Fast_Rep_String	FEATURE_INDEX_1
 # define index_arch_Fast_Copy_Backward	FEATURE_INDEX_1
@@ -329,6 +341,8 @@  extern const struct cpu_features *__get_cpu_features (void)
 # define index_arch_Use_dl_runtime_resolve_opt FEATURE_INDEX_1
 # define index_arch_Use_dl_runtime_resolve_slow FEATURE_INDEX_1
 # define index_arch_Prefer_No_AVX512	FEATURE_INDEX_1
+# define index_arch_IBT_Usable		FEATURE_INDEX_1
+# define index_arch_SHSTK_Usable	FEATURE_INDEX_1
 
 #endif	/* !__ASSEMBLER__ */
 
-- 
2.9.4