diff mbox

Add __builtin_stack_top to x86 backend

Message ID 20150730184107.GA4325@intel.com
State New
Headers show

Commit Message

H.J. Lu July 30, 2015, 6:41 p.m. UTC
On Tue, Jul 21, 2015 at 02:45:39PM -0700, H.J. Lu wrote:
> When __builtin_frame_address is used to retrieve the address of the
> function stack frame, the frame pointer is always kept, which wastes one
> register and 2 instructions.  For x86-32, one less register means
> significant negative impact on performance.  This patch adds a new
> builtin function, __builtin_ia32_stack_top, to x86 backend.  It
> returns the stack address when the function is called.
> 
> Any comments, feedbacks?
> 

Although this function is generic, but implementation is target
specific.  I submitted a generic patch:

https://gcc.gnu.org/ml/gcc-patches/2015-07/msg01859.html

So far there are no interests from other backends.  Here is a patch
to implement __builtin_stack_top in x86 backend.  We can update x86
backedn after it is added to middle-end.  OK for trunk?

Thanks.


H.J.
--
gcc/

	PR target/66960
	* config/i386/i386.c (ix86_expand_prologue): Sorry if DRAP is
	used and the stack address has been taken.
	(ix86_builtins): Add IX86_BUILTIN_STACK_TOP.
	(ix86_init_mmx_sse_builtins): Add __builtin_stack_top.
	(ix86_expand_builtin): Handle IX86_BUILTIN_STACK_TOP.
	* config/i386/i386.h (machine_function): Add stack_top_taken.
	* doc/extend.texi: Document __builtin_stack_top.

gcc/testsuite/

	PR target/66960
	* gcc.target/i386/pr66960-1.c: New test.
	* gcc.target/i386/pr66960-2.c: Likewise.
	* gcc.target/i386/pr66960-3.c: Likewise.
	* gcc.target/i386/pr66960-4.c: Likewise.
	* gcc.target/i386/pr66960-5.c: Likewise.
---
 gcc/config/i386/i386.c                    | 28 ++++++++++++++++++++++++++
 gcc/config/i386/i386.h                    |  4 ++++
 gcc/doc/extend.texi                       |  6 ++++++
 gcc/testsuite/gcc.target/i386/pr66960-1.c | 33 +++++++++++++++++++++++++++++++
 gcc/testsuite/gcc.target/i386/pr66960-2.c | 33 +++++++++++++++++++++++++++++++
 gcc/testsuite/gcc.target/i386/pr66960-3.c | 17 ++++++++++++++++
 gcc/testsuite/gcc.target/i386/pr66960-4.c | 21 ++++++++++++++++++++
 gcc/testsuite/gcc.target/i386/pr66960-5.c | 21 ++++++++++++++++++++
 8 files changed, 163 insertions(+)
 create mode 100644 gcc/testsuite/gcc.target/i386/pr66960-1.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pr66960-2.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pr66960-3.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pr66960-4.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pr66960-5.c

Comments

Uros Bizjak Aug. 3, 2015, 8:31 a.m. UTC | #1
On Thu, Jul 30, 2015 at 8:41 PM, H.J. Lu <hongjiu.lu@intel.com> wrote:
> On Tue, Jul 21, 2015 at 02:45:39PM -0700, H.J. Lu wrote:
>> When __builtin_frame_address is used to retrieve the address of the
>> function stack frame, the frame pointer is always kept, which wastes one
>> register and 2 instructions.  For x86-32, one less register means
>> significant negative impact on performance.  This patch adds a new
>> builtin function, __builtin_ia32_stack_top, to x86 backend.  It
>> returns the stack address when the function is called.
>>
>> Any comments, feedbacks?
>>
>
> Although this function is generic, but implementation is target
> specific.  I submitted a generic patch:
>
> https://gcc.gnu.org/ml/gcc-patches/2015-07/msg01859.html
>
> So far there are no interests from other backends.  Here is a patch
> to implement __builtin_stack_top in x86 backend.  We can update x86
> backedn after it is added to middle-end.  OK for trunk?

I think that the discussion about generic implementation should come
to some conclusion first. From the discussion, here was no resolution
on which way to go.

Uros.
diff mbox

Patch

diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
index ede8ea0..ef7ba6d 100644
--- a/gcc/config/i386/i386.c
+++ b/gcc/config/i386/i386.c
@@ -11575,6 +11575,12 @@  ix86_expand_prologue (void)
     {
       int align_bytes = crtl->stack_alignment_needed / BITS_PER_UNIT;
 
+      /* Can't use DRAP if the stack address has been taken.  */
+      if (cfun->machine->stack_top_taken)
+	sorry ("%<__builtin_stack_top%> not supported with stack"
+	       " realignment.  This may be worked around by adding"
+	       " -maccumulate-outgoing-arg.");
+
       /* Only need to push parameter pointer reg if it is caller saved.  */
       if (!call_used_regs[REGNO (crtl->drap_reg)])
 	{
@@ -30741,6 +30747,9 @@  enum ix86_builtins
   IX86_BUILTIN_READ_FLAGS,
   IX86_BUILTIN_WRITE_FLAGS,
 
+  /* Get the stack address when the function is called.  */
+  IX86_BUILTIN_STACK_TOP,
+
   IX86_BUILTIN_MAX
 };
 
@@ -34353,6 +34362,10 @@  ix86_init_mmx_sse_builtins (void)
   def_builtin (OPTION_MASK_ISA_MWAITX, "__builtin_ia32_mwaitx",
 	       VOID_FTYPE_UNSIGNED_UNSIGNED_UNSIGNED, IX86_BUILTIN_MWAITX);
 
+  /* Get the stack address when the function is called.  */
+  def_builtin (0, "__builtin_stack_top",
+	       PVOID_FTYPE_VOID, IX86_BUILTIN_STACK_TOP);
+
   /* Add FMA4 multi-arg argument instructions */
   for (i = 0, d = bdesc_multi_arg; i < ARRAY_SIZE (bdesc_multi_arg); i++, d++)
     {
@@ -40291,6 +40304,21 @@  addcarryx:
       emit_insn (gen_xabort (op0));
       return 0;
 
+    case IX86_BUILTIN_STACK_TOP:
+      cfun->machine->stack_top_taken = true;
+
+      if (!target
+	  || GET_MODE (target) != Pmode
+	  || !register_operand (target, Pmode))
+	target = gen_reg_rtx (Pmode);
+
+      /* After the prologue, stack top is at -WORD(AP) in the current
+	 frame.  */
+      emit_insn (gen_rtx_SET (target,
+			      plus_constant (Pmode, arg_pointer_rtx,
+					     -UNITS_PER_WORD)));
+      return target;
+
     default:
       break;
     }
diff --git a/gcc/config/i386/i386.h b/gcc/config/i386/i386.h
index 7bd23ec..3781a22 100644
--- a/gcc/config/i386/i386.h
+++ b/gcc/config/i386/i386.h
@@ -2492,6 +2492,10 @@  struct GTY(()) machine_function {
   /* If true, it is safe to not save/restore DRAP register.  */
   BOOL_BITFIELD no_drap_save_restore : 1;
 
+  /* If true, the stack address of the current function has been
+     taken.  */
+  BOOL_BITFIELD stack_top_taken : 1;
+
   /* If true, there is register available for argument passing.  This
      is used only in ix86_function_ok_for_sibcall by 32-bit to determine
      if there is scratch register available for indirect sibcall.  In
diff --git a/gcc/doc/extend.texi b/gcc/doc/extend.texi
index b18d8fb..971b584 100644
--- a/gcc/doc/extend.texi
+++ b/gcc/doc/extend.texi
@@ -16661,6 +16661,12 @@  The following built-in function is always available.
 @item void __builtin_ia32_pause (void)
 Generates the @code{pause} machine instruction with a compiler memory
 barrier.
+
+@item void *__builtin_stack_top (void)
+This function is similar to calling @code{__builtin_frame_address}
+with a value of @code{0}, but it returns the stack address when the
+function is called.  Unlike @code{__builtin_frame_address}, the frame
+pointer register isn't required.
 @end table
 
 The following floating-point built-in functions are made available in the
diff --git a/gcc/testsuite/gcc.target/i386/pr66960-1.c b/gcc/testsuite/gcc.target/i386/pr66960-1.c
new file mode 100644
index 0000000..aaab3cf
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr66960-1.c
@@ -0,0 +1,33 @@ 
+/* { dg-do compile { target *-*-linux* } } */
+/* { dg-options "-O2 -fomit-frame-pointer" { target { lp64 } } } */
+/* { dg-options "-O2 -fomit-frame-pointer -maddress-mode=short" { target { x32 } } } */
+/* { dg-options "-O2 -fomit-frame-pointer -miamcu" { target { ia32 } } } */
+
+extern char **environ;
+extern void exit (int status);
+extern int main (long argc, char **argv, char **envp);
+
+void
+_start (void)
+{
+  void *argc_p = __builtin_stack_top ();
+  char **argv = (char **) (argc_p + sizeof (void *));
+  long argc = *(long *) argc_p;
+  int status;
+
+  environ = argv + argc + 1;
+
+  status = main (argc, argv, environ);
+
+  exit (status);
+}
+
+/* { dg-final { scan-assembler "movq\[ \t\]8\\(%rsp\\), %rdi" { target lp64 } } } */
+/* { dg-final { scan-assembler "leaq\[ \t\]16\\(%rsp\\), %rsi" { target lp64 } } } */
+/* { dg-final { scan-assembler "leaq\[ \t\]24\\(%rsp,%rdi,8\\), %rdx" { target lp64 } } } */
+/* { dg-final { scan-assembler "movl\[ \t\]8\\(%esp\\), %edi" { target x32 } } } */
+/* { dg-final { scan-assembler "leal\[ \t\]12\\(%rsp\\), %esi" { target x32 } } } */
+/* { dg-final { scan-assembler "leal\[ \t\]4\\(%rsi,%rdi,4\\), %edx" { target x32 } } } */
+/* { dg-final { scan-assembler "movl\[ \t\]\\(%esp\\), %eax" { target ia32 } } } */
+/* { dg-final { scan-assembler "leal\[ \t\]4\\(%esp\\), %edx" { target ia32 } } } */
+/* { dg-final { scan-assembler "leal\[ \t\]8\\(%esp,%eax,4\\), %ecx" { target ia32 } } } */
diff --git a/gcc/testsuite/gcc.target/i386/pr66960-2.c b/gcc/testsuite/gcc.target/i386/pr66960-2.c
new file mode 100644
index 0000000..b9dbde2
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr66960-2.c
@@ -0,0 +1,33 @@ 
+/* { dg-do compile { target *-*-linux* } } */
+/* { dg-options "-O2 -fno-omit-frame-pointer" { target { lp64 } } } */
+/* { dg-options "-O2 -fno-omit-frame-pointer -maddress-mode=short" { target { x32 } } } */
+/* { dg-options "-O2 -fno-omit-frame-pointer -miamcu" { target { ia32 } } } */
+
+extern char **environ;
+extern void exit (int status);
+extern int main (long argc, char **argv, char **envp);
+
+void
+_start (void)
+{
+  void *argc_p = __builtin_stack_top ();
+  char **argv = (char **) (argc_p + sizeof (void *));
+  long argc = *(long *) argc_p;
+  int status;
+
+  environ = argv + argc + 1;
+
+  status = main (argc, argv, environ);
+
+  exit (status);
+}
+
+/* { dg-final { scan-assembler "movq\[ \t\]8\\(%rbp\\), %rdi" { target lp64 } } } */
+/* { dg-final { scan-assembler "leaq\[ \t\]16\\(%rbp\\), %rsi" { target lp64 } } } */
+/* { dg-final { scan-assembler "leaq\[ \t\]24\\(%rbp,%rdi,8\\), %rdx" { target lp64 } } } */
+/* { dg-final { scan-assembler "movl\[ \t\]8\\(%ebp\\), %edi" { target x32 } } } */
+/* { dg-final { scan-assembler "leal\[ \t\]12\\(%rbp\\), %esi" { target x32 } } } */
+/* { dg-final { scan-assembler "leal\[ \t\]4\\(%rsi,%rdi,4\\), %edx" { target x32 } } } */
+/* { dg-final { scan-assembler "movl\[ \t\]4\\(%ebp\\), %eax" { target ia32 } } } */
+/* { dg-final { scan-assembler "leal\[ \t\]8\\(%ebp\\), %edx" { target ia32 } } } */
+/* { dg-final { scan-assembler "leal\[ \t\]12\\(%ebp,%eax,4\\), %ecx" { target ia32 } } } */
diff --git a/gcc/testsuite/gcc.target/i386/pr66960-3.c b/gcc/testsuite/gcc.target/i386/pr66960-3.c
new file mode 100644
index 0000000..48cf25e
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr66960-3.c
@@ -0,0 +1,17 @@ 
+/* { dg-do compile { target *-*-linux* } } */
+/* { dg-options "-O2 -mno-accumulate-outgoing-args" { target { lp64 } } } */
+/* { dg-options "-O2 -mno-accumulate-outgoing-args -maddress-mode=short" { target { x32 } } } */
+/* { dg-options "-O2 -mno-accumulate-outgoing-args -miamcu" { target { ia32 } } } */
+
+extern void abort (void);
+extern int check_int (int *i, int align);
+typedef int aligned __attribute__((aligned(64)));
+
+void *
+foo (void)
+{
+  aligned j;
+  if (check_int (&j, __alignof__(j)) != j)
+    abort ();
+  return __builtin_stack_top ();
+} /* { dg-message "sorry, unimplemented: .__builtin_stack_top. not supported" } */
diff --git a/gcc/testsuite/gcc.target/i386/pr66960-4.c b/gcc/testsuite/gcc.target/i386/pr66960-4.c
new file mode 100644
index 0000000..44c0b26
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr66960-4.c
@@ -0,0 +1,21 @@ 
+/* { dg-do compile { target *-*-linux* } } */
+/* { dg-options "-O2 -maccumulate-outgoing-args" { target { lp64 } } } */
+/* { dg-options "-O2 -maccumulate-outgoing-args -maddress-mode=short" { target { x32 } } } */
+/* { dg-options "-O2 -maccumulate-outgoing-args -miamcu" { target { ia32 } } } */
+
+extern void abort (void);
+extern int check_int (int *i, int align);
+typedef int aligned __attribute__((aligned(64)));
+
+void *
+foo (void)
+{
+  aligned j;
+  if (check_int (&j, __alignof__(j)) != j)
+    abort ();
+  return __builtin_stack_top ();
+}
+
+/* { dg-final { scan-assembler "leaq\[ \t\]8\\(%rbp\\), %rax" { target lp64 } } } */
+/* { dg-final { scan-assembler "leal\[ \t\]8\\(%rbp\\), %eax" { target x32 } } } */
+/* { dg-final { scan-assembler "leal\[ \t\]4\\(%ebp\\), %eax" { target ia32 } } } */
diff --git a/gcc/testsuite/gcc.target/i386/pr66960-5.c b/gcc/testsuite/gcc.target/i386/pr66960-5.c
new file mode 100644
index 0000000..d449437
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr66960-5.c
@@ -0,0 +1,21 @@ 
+/* { dg-do link } */
+/* { dg-options "-O" } */
+
+extern void link_error (void);
+
+__attribute__ ((noinline, noclone))
+void
+foo (void)
+{
+  void **p = __builtin_stack_top ();
+  void *ra = __builtin_return_address (0);
+  if (*p != ra)
+    link_error ();
+}
+
+int
+main (void)
+{
+  foo ();
+  return 0;
+}