Patchwork [GOOGLE,PR58066] preferred_stack_boundary update for tls expanded call

login
register
mail settings
Submitter Wei Mi
Date March 12, 2014, 5:01 p.m.
Message ID <CA+4CFy76xZQZg8ZFfQrcOSh3bd+TxSu3R4u1vA86+LEA823XSw@mail.gmail.com>
Download mbox | patch
Permalink /patch/329568/
State New
Headers show

Comments

Wei Mi - March 12, 2014, 5:01 p.m.
This patch is to fix the problem described here:
http://gcc.gnu.org/bugzilla/show_bug.cgi?id=58066

The original patch is here:
http://gcc.gnu.org/ml/gcc-patches/2014-03/msg00369.html
The attached patch addresses HJ's comment.

bootstrap, regression test is ok. perf test in plain mode is ok. ok
for google-4_8 branch?

Thanks,
Wei.


gcc/ChangeLog:

2014-03-07  Wei Mi  <wmi@google.com>

        * config/i386/i386.c (ix86_compute_frame_layout): update
        preferred_stack_boundary when there is tls expanded call.
        * config/i386/i386.md: set
        ix86_tls_descriptor_calls_expanded_in_cfun.

gcc/testsuite/ChangeLog:

2014-03-07  Wei Mi  <wmi@google.com>

        * g++.dg/pr58066.C: New test.

Patch

Index: config/i386/i386.c
===================================================================
--- config/i386/i386.c  (revision 208464)
+++ config/i386/i386.c  (working copy)
@@ -9211,6 +9211,19 @@  ix86_compute_frame_layout (struct ix86_f
       crtl->preferred_stack_boundary = 128;
       crtl->stack_alignment_needed = 128;
     }
+  /* For 64-bit target, preferred_stack_boundary is never updated for call
+     expanded from tls descriptor. Update it here. We don't update it in
+     expand stage because according to the comments before
+     ix86_current_function_calls_tls_descriptor, tls calls may be optimized
+     away.  */
+  else if (TARGET_64BIT
+          && ix86_current_function_calls_tls_descriptor
+          && crtl->preferred_stack_boundary < PREFERRED_STACK_BOUNDARY)
+    {
+      crtl->preferred_stack_boundary = PREFERRED_STACK_BOUNDARY;
+      if (crtl->stack_alignment_needed < PREFERRED_STACK_BOUNDARY)
+       crtl->stack_alignment_needed = PREFERRED_STACK_BOUNDARY;
+    }

   gcc_assert (!size || stack_alignment_needed);
   gcc_assert (preferred_alignment >= STACK_BOUNDARY / BITS_PER_UNIT);
Index: config/i386/i386.md
===================================================================
--- config/i386/i386.md (revision 208464)
+++ config/i386/i386.md (working copy)
@@ -12776,7 +12776,11 @@ 
                     UNSPEC_TLS_GD))
      (clobber (match_scratch:SI 4))
      (clobber (match_scratch:SI 5))
-     (clobber (reg:CC FLAGS_REG))])])
+     (clobber (reg:CC FLAGS_REG))])]
+  ""
+{
+  ix86_tls_descriptor_calls_expanded_in_cfun = true;
+})

 (define_insn "*tls_global_dynamic_64_<mode>"
   [(set (match_operand:P 0 "register_operand" "=a")
@@ -12809,7 +12813,10 @@ 
           (const_int 0)))
      (unspec:P [(match_operand 1 "tls_symbolic_operand")]
               UNSPEC_TLS_GD)])]
-  "TARGET_64BIT")
+  "TARGET_64BIT"
+{
+  ix86_tls_descriptor_calls_expanded_in_cfun = true;
+})

 (define_insn "*tls_local_dynamic_base_32_gnu"
   [(set (match_operand:SI 0 "register_operand" "=a")
@@ -12844,7 +12851,11 @@ 
            UNSPEC_TLS_LD_BASE))
       (clobber (match_scratch:SI 3))
       (clobber (match_scratch:SI 4))
-      (clobber (reg:CC FLAGS_REG))])])
+      (clobber (reg:CC FLAGS_REG))])]
+  ""
+{
+  ix86_tls_descriptor_calls_expanded_in_cfun = true;
+})

 (define_insn "*tls_local_dynamic_base_64_<mode>"
   [(set (match_operand:P 0 "register_operand" "=a")
@@ -12870,7 +12881,10 @@ 
            (mem:QI (match_operand 1 "constant_call_address_operand"))
            (const_int 0)))
       (unspec:P [(const_int 0)] UNSPEC_TLS_LD_BASE)])]
-  "TARGET_64BIT")
+  "TARGET_64BIT"
+{
+  ix86_tls_descriptor_calls_expanded_in_cfun = true;
+})

 ;; Local dynamic of a single variable is a lose.  Show combine how
 ;; to convert that back to global dynamic.
Index: testsuite/g++.dg/pr58066.C
===================================================================
--- testsuite/g++.dg/pr58066.C  (revision 0)
+++ testsuite/g++.dg/pr58066.C  (revision 0)
@@ -0,0 +1,12 @@ 
+/* { dg-do compile { target {{ i?86-*-* x86_64-*-* } && { ! ia32 } } } } */
+/* { dg-options "-fPIC -O2" } */
+
+/* Check whether the stack frame starting address of tls expanded call
+   in __cxa_get_globals() is 16bytes aligned.  */
+static __thread char ccc;
+extern "C" void* __cxa_get_globals() throw()
+{
+ return &ccc;
+}
+
+/* { dg-final { scan-assembler ".cfi_def_cfa_offset 16" } } */