Message ID | 20160609003757.GA19143@intel.com |
---|---|
State | New |
Headers | show |
On 06/08/2016 08:37 PM, H.J. Lu wrote: > Since __libc_start_main in libc.so is called very early, lazy binding > isn't relevant. Always call __libc_start_main with indirect branch via > GOT to avoid extra branch to PLT slot. In case of static executable, > ld in binutils 2.26 or above can convert indirect branch into direct > branch: > > 0000000000400a80 <_start>: > 400a80: 31 ed xor %ebp,%ebp > 400a82: 49 89 d1 mov %rdx,%r9 > 400a85: 5e pop %rsi > 400a86: 48 89 e2 mov %rsp,%rdx > 400a89: 48 83 e4 f0 and $0xfffffffffffffff0,%rsp > 400a8d: 50 push %rax > 400a8e: 54 push %rsp > 400a8f: 49 c7 c0 20 1b 40 00 mov $0x401b20,%r8 > 400a96: 48 c7 c1 90 1a 40 00 mov $0x401a90,%rcx > 400a9d: 48 c7 c7 c0 03 40 00 mov $0x4003c0,%rdi > 400aa4: 67 e8 96 09 00 00 addr32 callq 401440 <__libc_start_main> > 400aaa: f4 hlt > > Tested on x86-64. OK for master? Looks good to me. I saw this particular instance while reviewing your binutils patches to enable the same optimization. > H.J. > --- > * sysdeps/x86_64/start.S (_start): Always indirect branch to > __libc_start_main via GOT. > --- > sysdeps/x86_64/start.S | 17 ++++++++--------- > 1 file changed, 8 insertions(+), 9 deletions(-) > > diff --git a/sysdeps/x86_64/start.S b/sysdeps/x86_64/start.S > index 2369b69..f1b961f 100644 > --- a/sysdeps/x86_64/start.S > +++ b/sysdeps/x86_64/start.S > @@ -102,23 +102,22 @@ ENTRY (_start) > mov __libc_csu_init@GOTPCREL(%rip), %RCX_LP > > mov main@GOTPCREL(%rip), %RDI_LP > - > - /* Call the user's main function, and exit with its value. > - But let the libc call main. Since __libc_start_main is > - called very early, lazy binding isn't relevant here. Use > - indirect branch via GOT to avoid extra branch to PLT slot. */ > - call *__libc_start_main@GOTPCREL(%rip) > #else > /* Pass address of our own entry points to .fini and .init. */ > mov $__libc_csu_fini, %R8_LP > mov $__libc_csu_init, %RCX_LP > > mov $main, %RDI_LP > +#endif > > /* Call the user's main function, and exit with its value. > - But let the libc call main. */ > - call __libc_start_main > -#endif > + But let the libc call main. Since __libc_start_main in > + libc.so is called very early, lazy binding isn't relevant > + here. Use indirect branch via GOT to avoid extra branch > + to PLT slot. In case of static executable, ld in binutils > + 2.26 or above can convert indirect branch into direct > + branch. */ > + call *__libc_start_main@GOTPCREL(%rip) > > hlt /* Crash if somehow `exit' does return. */ > END (_start) >
diff --git a/sysdeps/x86_64/start.S b/sysdeps/x86_64/start.S index 2369b69..f1b961f 100644 --- a/sysdeps/x86_64/start.S +++ b/sysdeps/x86_64/start.S @@ -102,23 +102,22 @@ ENTRY (_start) mov __libc_csu_init@GOTPCREL(%rip), %RCX_LP mov main@GOTPCREL(%rip), %RDI_LP - - /* Call the user's main function, and exit with its value. - But let the libc call main. Since __libc_start_main is - called very early, lazy binding isn't relevant here. Use - indirect branch via GOT to avoid extra branch to PLT slot. */ - call *__libc_start_main@GOTPCREL(%rip) #else /* Pass address of our own entry points to .fini and .init. */ mov $__libc_csu_fini, %R8_LP mov $__libc_csu_init, %RCX_LP mov $main, %RDI_LP +#endif /* Call the user's main function, and exit with its value. - But let the libc call main. */ - call __libc_start_main -#endif + But let the libc call main. Since __libc_start_main in + libc.so is called very early, lazy binding isn't relevant + here. Use indirect branch via GOT to avoid extra branch + to PLT slot. In case of static executable, ld in binutils + 2.26 or above can convert indirect branch into direct + branch. */ + call *__libc_start_main@GOTPCREL(%rip) hlt /* Crash if somehow `exit' does return. */ END (_start)