Message ID | 20210817060532.1210408-1-maskray@google.com |
---|---|
State | New |
Headers | show |
Series | x86_64: Simplify elf_machine_{load_address,dynamic} | expand |
On Mon, Aug 16, 2021 at 11:24 PM Fangrui Song via Libc-alpha <libc-alpha@sourceware.org> wrote: > > and drop reliance on _GLOBAL_OFFSET_TABLE_[0]. > > &__ehdr_start is a better way to get the load address. > --- > sysdeps/x86_64/dl-machine.h | 25 +++++++++---------------- > 1 file changed, 9 insertions(+), 16 deletions(-) > > diff --git a/sysdeps/x86_64/dl-machine.h b/sysdeps/x86_64/dl-machine.h > index a8596aa3fa..ceee50734e 100644 > --- a/sysdeps/x86_64/dl-machine.h > +++ b/sysdeps/x86_64/dl-machine.h > @@ -35,27 +35,20 @@ elf_machine_matches_host (const ElfW(Ehdr) *ehdr) > } > > > -/* Return the link-time address of _DYNAMIC. Conveniently, this is the > - first element of the GOT. This must be inlined in a function which > - uses global data. */ > -static inline ElfW(Addr) __attribute__ ((unused)) > -elf_machine_dynamic (void) > -{ > - /* This produces an IP-relative reloc which is resolved at link time. */ > - extern const ElfW(Addr) _GLOBAL_OFFSET_TABLE_[] attribute_hidden; > - return _GLOBAL_OFFSET_TABLE_[0]; > -} > - > - > /* Return the run-time load address of the shared object. */ > static inline ElfW(Addr) __attribute__ ((unused)) > elf_machine_load_address (void) > { > - /* Compute the difference between the runtime address of _DYNAMIC as seen > - by an IP-relative reference, and the link-time address found in the > - special unrelocated first GOT entry. */ > + extern const ElfW(Ehdr) __ehdr_start attribute_hidden; > + return (ElfW(Addr)) &__ehdr_start; > +} > + > +/* Return the link-time address of _DYNAMIC. */ > +static inline ElfW(Addr) __attribute__ ((unused)) > +elf_machine_dynamic (void) > +{ > extern ElfW(Dyn) _DYNAMIC[] attribute_hidden; > - return (ElfW(Addr)) &_DYNAMIC - elf_machine_dynamic (); > + return (ElfW(Addr)) _DYNAMIC - elf_machine_load_address (); > } > > /* Set up the loaded object described by L so its unrelocated PLT > -- > 2.33.0.rc1.237.g0d66db33f3-goog > Please provide comparison of ld.so with and without the change, in terms of code size, code sequence as well as dynamic relocation. Thanks.
On 2021-08-17, H.J. Lu wrote: >On Mon, Aug 16, 2021 at 11:24 PM Fangrui Song via Libc-alpha ><libc-alpha@sourceware.org> wrote: >> >> and drop reliance on _GLOBAL_OFFSET_TABLE_[0]. >> >> &__ehdr_start is a better way to get the load address. >> --- >> sysdeps/x86_64/dl-machine.h | 25 +++++++++---------------- >> 1 file changed, 9 insertions(+), 16 deletions(-) >> >> diff --git a/sysdeps/x86_64/dl-machine.h b/sysdeps/x86_64/dl-machine.h >> index a8596aa3fa..ceee50734e 100644 >> --- a/sysdeps/x86_64/dl-machine.h >> +++ b/sysdeps/x86_64/dl-machine.h >> @@ -35,27 +35,20 @@ elf_machine_matches_host (const ElfW(Ehdr) *ehdr) >> } >> >> >> -/* Return the link-time address of _DYNAMIC. Conveniently, this is the >> - first element of the GOT. This must be inlined in a function which >> - uses global data. */ >> -static inline ElfW(Addr) __attribute__ ((unused)) >> -elf_machine_dynamic (void) >> -{ >> - /* This produces an IP-relative reloc which is resolved at link time. */ >> - extern const ElfW(Addr) _GLOBAL_OFFSET_TABLE_[] attribute_hidden; >> - return _GLOBAL_OFFSET_TABLE_[0]; >> -} >> - >> - >> /* Return the run-time load address of the shared object. */ >> static inline ElfW(Addr) __attribute__ ((unused)) >> elf_machine_load_address (void) >> { >> - /* Compute the difference between the runtime address of _DYNAMIC as seen >> - by an IP-relative reference, and the link-time address found in the >> - special unrelocated first GOT entry. */ >> + extern const ElfW(Ehdr) __ehdr_start attribute_hidden; >> + return (ElfW(Addr)) &__ehdr_start; >> +} >> + >> +/* Return the link-time address of _DYNAMIC. */ >> +static inline ElfW(Addr) __attribute__ ((unused)) >> +elf_machine_dynamic (void) >> +{ >> extern ElfW(Dyn) _DYNAMIC[] attribute_hidden; >> - return (ElfW(Addr)) &_DYNAMIC - elf_machine_dynamic (); >> + return (ElfW(Addr)) _DYNAMIC - elf_machine_load_address (); >> } >> >> /* Set up the loaded object described by L so its unrelocated PLT >> -- >> 2.33.0.rc1.237.g0d66db33f3-goog >> > >Please provide comparison of ld.so with and without the change, in >terms of code size, code sequence as well as dynamic relocation. > >Thanks. Neither form has dynamic relocations. If both elf_machine_{load_address,dynamic} are emitted, the new form is two bytes longer but has fewer GOT loads. 0000000000000050 <elf_machine_dynamic>: 50: movq (%rip), %rax # 0x57 <elf_machine_dynamic+0x7> 0000000000000053: R_X86_64_GOTPC32 _GLOBAL_OFFSET_TABLE_-0x4 57: retq 58: nopl (%rax,%rax) 0000000000000060 <elf_machine_load_address>: 60: leaq (%rip), %rax # 0x67 <elf_machine_load_address+0x7> 0000000000000063: R_X86_64_PC32 _DYNAMIC-0x4 67: subq (%rip), %rax # 0x6e <elf_machine_load_address+0xe> 000000000000006a: R_X86_64_GOTPC32 _GLOBAL_OFFSET_TABLE_-0x4 6e: retq --- 0000000000000050 <elf_machine_load_address>: 50: leaq (%rip), %rax # 0x57 <elf_machine_load_address+0x7> 0000000000000053: R_X86_64_PC32 __ehdr_start-0x4 57: retq 58: nopl (%rax,%rax) 0000000000000060 <elf_machine_dynamic>: 60: leaq (%rip), %rdx # 0x67 <elf_machine_dynamic+0x7> 0000000000000063: R_X86_64_PC32 __ehdr_start-0x4 67: leaq (%rip), %rax # 0x6e <elf_machine_dynamic+0xe> 000000000000006a: R_X86_64_PC32 _DYNAMIC-0x4 6e: subq %rdx, %rax 71: retq If inlined, as what actually happens when GCC generates elf/rtld.os, the new form is actually 3 bytes shorter. The difference is nearly nothing when an align directive is added. cf6: shlq $32, %rdx cfa: orq %rdx, %rax cfd: leaq (%rip), %rdx # 0xd04 <_dl_start+0x24> 0000000000000d00: R_X86_64_PC32 _DYNAMIC-0x4 d04: movq %rax, (%rip) # 0xd0b <_dl_start+0x2b> 0000000000000d07: R_X86_64_PC32 .data.rel.ro+0x7c d0b: movq (%rip), %rax # 0xd12 <_dl_start+0x32> 0000000000000d0e: R_X86_64_PC32 _DYNAMIC-0x4 d12: movq %rdx, %r12 d15: subq (%rip), %r12 # 0xd1c <_dl_start+0x3c> 0000000000000d18: R_X86_64_GOTPC32 _GLOBAL_OFFSET_TABLE_-0x4 d1c: movq %rdx, (%rip) # 0xd23 <_dl_start+0x43> 0000000000000d1f: R_X86_64_PC32 _rtld_local+0xa14 d23: movq %r12, (%rip) # 0xd2a <_dl_start+0x4a> 0000000000000d26: R_X86_64_PC32 _rtld_local+0xa04 d2a: testq %rax, %rax d2d: je 0xdd0 <_dl_start+0xf0> d33: movl $1879048191, %edi # imm = 0x6FFFFFFF d38: movl $1879047679, %r10d # imm = 0x6FFFFDFF d3e: movl $1879047935, %ebx # imm = 0x6FFFFEFF d43: movl $1879048001, %r13d # imm = 0x6FFFFF41 d49: leaq (%rip), %rcx # 0xd50 <_dl_start+0x70> 0000000000000d4c: R_X86_64_PC32 _rtld_local+0xa44 d50: movl $1879047733, %r11d # imm = 0x6FFFFE35 d56: movl $50, %r9d d5c: movl $1879048226, %r8d # imm = 0x70000022 d62: jmp 0xd82 <_dl_start+0xa2> --- cf6: leaq (%rip), %r13 # 0xcfd <_dl_start+0x1d> 0000000000000cf9: R_X86_64_PC32 __ehdr_start-0x4 cfd: movq %r13, (%rip) # 0xd04 <_dl_start+0x24> 0000000000000d00: R_X86_64_PC32 _rtld_local+0xa04 d04: shlq $32, %rdx d08: orq %rdx, %rax d0b: leaq (%rip), %rdx # 0xd12 <_dl_start+0x32> 0000000000000d0e: R_X86_64_PC32 _DYNAMIC-0x4 d12: movq %rax, (%rip) # 0xd19 <_dl_start+0x39> 0000000000000d15: R_X86_64_PC32 .data.rel.ro+0x7c d19: movq (%rip), %rax # 0xd20 <_dl_start+0x40> 0000000000000d1c: R_X86_64_PC32 _DYNAMIC-0x4 d20: movq %rdx, (%rip) # 0xd27 <_dl_start+0x47> 0000000000000d23: R_X86_64_PC32 _rtld_local+0xa14 d27: testq %rax, %rax d2a: je 0xdd0 <_dl_start+0xf0> d30: movl $1879048191, %edi # imm = 0x6FFFFFFF d35: movl $1879047679, %r10d # imm = 0x6FFFFDFF d3b: movl $1879047935, %ebx # imm = 0x6FFFFEFF d40: movl $1879048001, %r14d # imm = 0x6FFFFF41 d46: leaq (%rip), %rcx # 0xd4d <_dl_start+0x6d> 0000000000000d49: R_X86_64_PC32 _rtld_local+0xa44 d4d: movl $1879047733, %r11d # imm = 0x6FFFFE35 d53: movl $50, %r9d d59: movl $1879048226, %r8d # imm = 0x70000022 d5f: jmp 0xd82 <_dl_start+0xa2> I think the main merit is to drop reliance on _GLOBAL_OFFSET_TABLE_[0]. (Newer ports can learn from the existing x86-64/aarch64 ports that _GLOBAL_OFFSET_TABLE_[0] doesn't need to do anything special.)
On Tue, Aug 17, 2021 at 5:14 AM H.J. Lu <hjl.tools@gmail.com> wrote: > > On Mon, Aug 16, 2021 at 11:24 PM Fangrui Song via Libc-alpha > <libc-alpha@sourceware.org> wrote: > > > > and drop reliance on _GLOBAL_OFFSET_TABLE_[0]. > > > > &__ehdr_start is a better way to get the load address. > > --- > > sysdeps/x86_64/dl-machine.h | 25 +++++++++---------------- > > 1 file changed, 9 insertions(+), 16 deletions(-) > > > > diff --git a/sysdeps/x86_64/dl-machine.h b/sysdeps/x86_64/dl-machine.h > > index a8596aa3fa..ceee50734e 100644 > > --- a/sysdeps/x86_64/dl-machine.h > > +++ b/sysdeps/x86_64/dl-machine.h > > @@ -35,27 +35,20 @@ elf_machine_matches_host (const ElfW(Ehdr) *ehdr) > > } > > > > > > -/* Return the link-time address of _DYNAMIC. Conveniently, this is the > > - first element of the GOT. This must be inlined in a function which > > - uses global data. */ > > -static inline ElfW(Addr) __attribute__ ((unused)) > > -elf_machine_dynamic (void) > > -{ > > - /* This produces an IP-relative reloc which is resolved at link time. */ > > - extern const ElfW(Addr) _GLOBAL_OFFSET_TABLE_[] attribute_hidden; > > - return _GLOBAL_OFFSET_TABLE_[0]; > > -} > > - > > - > > /* Return the run-time load address of the shared object. */ > > static inline ElfW(Addr) __attribute__ ((unused)) > > elf_machine_load_address (void) > > { > > - /* Compute the difference between the runtime address of _DYNAMIC as seen > > - by an IP-relative reference, and the link-time address found in the > > - special unrelocated first GOT entry. */ > > + extern const ElfW(Ehdr) __ehdr_start attribute_hidden; > > + return (ElfW(Addr)) &__ehdr_start; > > +} > > + > > +/* Return the link-time address of _DYNAMIC. */ > > +static inline ElfW(Addr) __attribute__ ((unused)) > > +elf_machine_dynamic (void) > > +{ > > extern ElfW(Dyn) _DYNAMIC[] attribute_hidden; > > - return (ElfW(Addr)) &_DYNAMIC - elf_machine_dynamic (); > > + return (ElfW(Addr)) _DYNAMIC - elf_machine_load_address (); > > } > > > > /* Set up the loaded object described by L so its unrelocated PLT > > -- > > 2.33.0.rc1.237.g0d66db33f3-goog > > > > Please provide comparison of ld.so with and without the change, in > terms of code size, code sequence as well as dynamic relocation. LGTM. Reviewed-by: H.J. Lu <hjl.tools@gmail.com> Thanks.
diff --git a/sysdeps/x86_64/dl-machine.h b/sysdeps/x86_64/dl-machine.h index a8596aa3fa..ceee50734e 100644 --- a/sysdeps/x86_64/dl-machine.h +++ b/sysdeps/x86_64/dl-machine.h @@ -35,27 +35,20 @@ elf_machine_matches_host (const ElfW(Ehdr) *ehdr) } -/* Return the link-time address of _DYNAMIC. Conveniently, this is the - first element of the GOT. This must be inlined in a function which - uses global data. */ -static inline ElfW(Addr) __attribute__ ((unused)) -elf_machine_dynamic (void) -{ - /* This produces an IP-relative reloc which is resolved at link time. */ - extern const ElfW(Addr) _GLOBAL_OFFSET_TABLE_[] attribute_hidden; - return _GLOBAL_OFFSET_TABLE_[0]; -} - - /* Return the run-time load address of the shared object. */ static inline ElfW(Addr) __attribute__ ((unused)) elf_machine_load_address (void) { - /* Compute the difference between the runtime address of _DYNAMIC as seen - by an IP-relative reference, and the link-time address found in the - special unrelocated first GOT entry. */ + extern const ElfW(Ehdr) __ehdr_start attribute_hidden; + return (ElfW(Addr)) &__ehdr_start; +} + +/* Return the link-time address of _DYNAMIC. */ +static inline ElfW(Addr) __attribute__ ((unused)) +elf_machine_dynamic (void) +{ extern ElfW(Dyn) _DYNAMIC[] attribute_hidden; - return (ElfW(Addr)) &_DYNAMIC - elf_machine_dynamic (); + return (ElfW(Addr)) _DYNAMIC - elf_machine_load_address (); } /* Set up the loaded object described by L so its unrelocated PLT