diff mbox

[RFC,X86_64] Eliminate PLT stubs for specified external functions via -fno-plt=

Message ID CAAs8HmyQKibfDXhWQc4tj5V1Nb0pgQfkhqYjcksfRUqsA27OMA@mail.gmail.com
State New
Headers show

Commit Message

Sriraman Tallam May 29, 2015, 9:41 p.m. UTC
Made one more change and New patch attached.

Thanks
Sri

On Fri, May 29, 2015 at 2:37 PM, Sriraman Tallam <tmsriram@google.com> wrote:
> On Fri, May 29, 2015 at 12:35 PM, Jan Hubicka <hubicka@ucw.cz> wrote:
>>>       * config/i386/i386.c (avoid_plt_to_call): New function.
>>>       (ix86_output_call_insn): Generate indirect call for functions
>>>       marked with "noplt" attribute.
>>>       (attribute_spec ix86_attribute_): Define new attribute "noplt".
>>>       * doc/extend.texi: Document new attribute "noplt".
>>>       * gcc.target/i386/noplt-1.c: New testcase.
>>>       * gcc.target/i386/noplt-2.c: New testcase.
>>>
>>> Index: config/i386/i386.c
>>> ===================================================================
>>> --- config/i386/i386.c        (revision 223720)
>>> +++ config/i386/i386.c        (working copy)
>>> @@ -25599,6 +25599,24 @@ ix86_expand_call (rtx retval, rtx fnaddr, rtx call
>>>    return call;
>>>  }
>>>
>>> +/* Return true if the function being called was marked with attribute
>>> +   "noplt".  If this function is defined, this should return false.  */
>>> +static bool
>>> +avoid_plt_to_call (rtx call_op)
>>> +{
>>> +  if (SYMBOL_REF_LOCAL_P (call_op))
>>> +    return false;
>>> +
>>> +  tree symbol_decl = SYMBOL_REF_DECL (call_op);
>>> +
>>> +  if (symbol_decl != NULL_TREE
>>> +      && TREE_CODE (symbol_decl) == FUNCTION_DECL
>>> +      && lookup_attribute ("noplt", DECL_ATTRIBUTES (symbol_decl)))
>>> +    return true;
>>> +
>>> +  return false;
>>> +}
>>
>> OK, now we have __attribute__ (optimize("noplt")) which binds to the caller and makes
>> all calls in the function to skip PLT and __attribute__ ("noplt") which binds to callee
>> and makes all calls to function to not use PLT.
>>
>> That sort of makes sense to me, but why "noplt" attribute is not implemented at generic level
>> just like -fplt? Is it only because every target supporting PLT would need update in its
>> call expansion patterns?
>
> Yes, that is what I had in mind.
>
>>
>> Also I think the PLT calls have EBX in call fusage wich is added by ix86_expand_call.
>>   else
>>     {
>>       /* Static functions and indirect calls don't need the pic register.  */
>>       if (flag_pic
>>           && (!TARGET_64BIT
>>               || (ix86_cmodel == CM_LARGE_PIC
>>                   && DEFAULT_ABI != MS_ABI))
>>           && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
>>           && ! SYMBOL_REF_LOCAL_P (XEXP (fnaddr, 0)))
>>         {
>>           use_reg (&use, gen_rtx_REG (Pmode, REAL_PIC_OFFSET_TABLE_REGNUM));
>>           if (ix86_use_pseudo_pic_reg ())
>>             emit_move_insn (gen_rtx_REG (Pmode, REAL_PIC_OFFSET_TABLE_REGNUM),
>>                             pic_offset_table_rtx);
>>         }
>>
>> I think you want to take that away from FUSAGE there just like we do for local calls
>> (and in fact the code should already check flag_pic && flag_plt I suppose.
>
> Done that now and patch attached.
>
> Thanks
> Sri
>
>>
>> Honza
* config/i386/i386.c (avoid_plt_to_call): New function.
	(ix86_expand_call): Dont use the PIC register when external function
	calls are not made via PLT.
	(ix86_output_call_insn): Generate indirect call for functions
	marked with "noplt" attribute.
	(attribute_spec ix86_attribute_): Define new attribute "noplt".
	* doc/extend.texi: Document new attribute "noplt".
	* gcc.target/i386/noplt-1.c: New testcase.
	* gcc.target/i386/noplt-2.c: New testcase.
diff mbox

Patch

Index: config/i386/i386.c
===================================================================
--- config/i386/i386.c	(revision 223720)
+++ config/i386/i386.c	(working copy)
@@ -25475,6 +25475,28 @@  construct_plt_address (rtx symbol)
   return tmp;
 }
 
+/* Return true if the function being called was marked with attribute
+   "noplt".  If this function is defined, this should return false.  This
+   is currently used only with 64-bit ELF targets.  */
+static bool
+avoid_plt_to_call (rtx call_op)
+{
+  if (!TARGET_64BIT || TARGET_MACHO|| TARGET_SEH || TARGET_PECOFF)
+    return false;
+
+  if (SYMBOL_REF_LOCAL_P (call_op))
+    return false;
+
+  tree symbol_decl = SYMBOL_REF_DECL (call_op);
+
+  if (symbol_decl != NULL_TREE
+      && TREE_CODE (symbol_decl) == FUNCTION_DECL
+      && lookup_attribute ("noplt", DECL_ATTRIBUTES (symbol_decl)))
+    return true;
+
+  return false;
+}
+
 rtx
 ix86_expand_call (rtx retval, rtx fnaddr, rtx callarg1,
 		  rtx callarg2,
@@ -25497,13 +25519,16 @@  ix86_expand_call (rtx retval, rtx fnaddr, rtx call
     }
   else
     {
-      /* Static functions and indirect calls don't need the pic register.  */
+      /* Static functions and indirect calls don't need the pic register.  Also,
+	 check if PLT was explicitly avoided via no-plt or "noplt" attribute, making
+	 it an indirect call.  */
       if (flag_pic
 	  && (!TARGET_64BIT
 	      || (ix86_cmodel == CM_LARGE_PIC
 		  && DEFAULT_ABI != MS_ABI))
 	  && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
-	  && ! SYMBOL_REF_LOCAL_P (XEXP (fnaddr, 0)))
+	  && ! SYMBOL_REF_LOCAL_P (XEXP (fnaddr, 0))
+	  && flag_plt && !avoid_plt_to_call (XEXP (fnaddr, 0)))
 	{
 	  use_reg (&use, gen_rtx_REG (Pmode, REAL_PIC_OFFSET_TABLE_REGNUM));
 	  if (ix86_use_pseudo_pic_reg ())
@@ -25611,7 +25636,12 @@  ix86_output_call_insn (rtx_insn *insn, rtx call_op
   if (SIBLING_CALL_P (insn))
     {
       if (direct_p)
-	xasm = "%!jmp\t%P0";
+	{
+	  if (avoid_plt_to_call (call_op))
+	    xasm = "%!jmp\t*%p0@GOTPCREL(%%rip)";
+	  else
+	    xasm = "%!jmp\t%P0";
+	}
       /* SEH epilogue detection requires the indirect branch case
 	 to include REX.W.  */
       else if (TARGET_SEH)
@@ -25654,7 +25684,12 @@  ix86_output_call_insn (rtx_insn *insn, rtx call_op
     }
 
   if (direct_p)
-    xasm = "%!call\t%P0";
+    {
+      if (avoid_plt_to_call (call_op))
+        xasm = "%!call\t*%p0@GOTPCREL(%%rip)";
+      else
+        xasm = "%!call\t%P0";
+    }
   else
     xasm = "%!call\t%A0";
 
@@ -46628,6 +46663,9 @@  static const struct attribute_spec ix86_attribute_
     false },
   { "callee_pop_aggregate_return", 1, 1, false, true, true,
     ix86_handle_callee_pop_aggregate_return, true },
+  /* Attribute to avoid calling function via PLT.  */
+  { "noplt", 0, 0, true, false, false, ix86_handle_fndecl_attribute,
+    false },
   /* End element.  */
   { NULL,        0, 0, false, false, false, NULL, false }
 };
Index: doc/extend.texi
===================================================================
--- doc/extend.texi	(revision 223720)
+++ doc/extend.texi	(working copy)
@@ -4858,6 +4858,13 @@  On x86-32 targets, the @code{stdcall} attribute ca
 assume that the called function pops off the stack space used to
 pass arguments, unless it takes a variable number of arguments.
 
+@item noplt
+@cindex @code{noplt} function attribute, x86-64
+@cindex functions whose calls do not go via PLT
+On x86-64 targets. the @code{noplt} attribute causes the compiler to
+call this external function indirectly using a GOT entry and avoid the
+PLT.
+
 @item target (@var{options})
 @cindex @code{target} function attribute
 As discussed in @ref{Common Function Attributes}, this attribute 
Index: testsuite/gcc.target/i386/noplt-1.c
===================================================================
--- testsuite/gcc.target/i386/noplt-1.c	(revision 0)
+++ testsuite/gcc.target/i386/noplt-1.c	(working copy)
@@ -0,0 +1,13 @@ 
+/* { dg-do compile { target x86_64-*-linux* } } */
+
+
+__attribute__ ((noplt))
+void foo();
+
+int main()
+{
+  foo();
+  return 0;
+}
+
+/* { dg-final { scan-assembler "call\[ \t\]\\*.*foo.*@GOTPCREL\\(%rip\\)" } } */ 
Index: testsuite/gcc.target/i386/noplt-2.c
===================================================================
--- testsuite/gcc.target/i386/noplt-2.c	(revision 0)
+++ testsuite/gcc.target/i386/noplt-2.c	(working copy)
@@ -0,0 +1,13 @@ 
+/* { dg-do compile { target x86_64-*-linux* } } */
+/* { dg-options "-O2" } */
+
+
+__attribute__ ((noplt))
+int foo();
+
+int main()
+{
+  return foo();
+}
+
+/* { dg-final { scan-assembler "jmp\[ \t\]\\*.*foo.*@GOTPCREL\\(%rip\\)" } } */