From patchwork Mon Mar 19 16:34:53 2012 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 8bit X-Patchwork-Submitter: "H.J. Lu" X-Patchwork-Id: 147577 Return-Path: X-Original-To: incoming@patchwork.ozlabs.org Delivered-To: patchwork-incoming@bilbo.ozlabs.org Received: from sourceware.org (server1.sourceware.org [209.132.180.131]) by ozlabs.org (Postfix) with SMTP id 420C6B6FC9 for ; Tue, 20 Mar 2012 03:35:32 +1100 (EST) Comment: DKIM? See http://www.dkim.org DKIM-Signature: v=1; a=rsa-sha1; c=relaxed/relaxed; d=gcc.gnu.org; s=default; x=1332779732; h=Comment: DomainKey-Signature:Received:Received:Received:Received: MIME-Version:Received:Received:In-Reply-To:References:Date: Message-ID:Subject:From:To:Cc:Content-Type:Mailing-List: Precedence:List-Id:List-Unsubscribe:List-Archive:List-Post: List-Help:Sender:Delivered-To; bh=9Ixd760CgDa3gntb+qS0dfjEW5g=; b=ATjxhm3oKGes8w+qNk7PI4InHc0ONGAsvtT66ir9DPQQjtNuogSmfqIr0biDwh c9+9v+Tf6HKE6sQeVKOkZAXqBzWpszVcStLWgyfgVpchHiSwyw5DssbWn7xkGX7l iQ6lfvQGE0TTBnrM7Etyoucz99Zq6umMK9mpdSYBG0Evc= Comment: DomainKeys? See http://antispam.yahoo.com/domainkeys DomainKey-Signature: a=rsa-sha1; q=dns; c=nofws; s=default; d=gcc.gnu.org; h=Received:Received:X-SWARE-Spam-Status:X-Spam-Check-By:Received:Received:MIME-Version:Received:Received:In-Reply-To:References:Date:Message-ID:Subject:From:To:Cc:Content-Type:X-IsSubscribed:Mailing-List:Precedence:List-Id:List-Unsubscribe:List-Archive:List-Post:List-Help:Sender:Delivered-To; b=yFeQoOfFbvh78w3im+rSNZhYaosh/pvpF4KGAU6nDAsey6m24CFnKrBUpdtMam +ScmRgD3CtIaTZXya5qQGkABtOTxsyzm00LHQnLa8wNBZoZgQt/CKFaehG5IY5Ce CPQdc1m8U4hUXBfS78bH8RTuSAAfU6AZ6yaE4CrLNIkJM=; Received: (qmail 21486 invoked by alias); 19 Mar 2012 16:35:22 -0000 Received: (qmail 20916 invoked by uid 22791); 19 Mar 2012 16:35:19 -0000 X-SWARE-Spam-Status: No, hits=-2.2 required=5.0 tests=AWL, BAYES_00, DKIM_SIGNED, DKIM_VALID, DKIM_VALID_AU, FREEMAIL_FROM, RCVD_IN_DNSWL_LOW, TW_OV, TW_VZ, TW_ZB, TW_ZJ, TW_ZW X-Spam-Check-By: sourceware.org Received: from mail-gy0-f175.google.com (HELO mail-gy0-f175.google.com) (209.85.160.175) by sourceware.org (qpsmtpd/0.43rc1) with ESMTP; Mon, 19 Mar 2012 16:34:54 +0000 Received: by ghbz2 with SMTP id z2so5888727ghb.20 for ; Mon, 19 Mar 2012 09:34:53 -0700 (PDT) MIME-Version: 1.0 Received: by 10.224.27.84 with SMTP id h20mr15847744qac.48.1332174893792; Mon, 19 Mar 2012 09:34:53 -0700 (PDT) Received: by 10.229.89.137 with HTTP; Mon, 19 Mar 2012 09:34:53 -0700 (PDT) In-Reply-To: References: Date: Mon, 19 Mar 2012 09:34:53 -0700 Message-ID: Subject: Re: PATCH: Properly generate X32 IE sequence From: "H.J. Lu" To: Uros Bizjak Cc: gcc-patches@gcc.gnu.org, Richard Henderson X-IsSubscribed: yes Mailing-List: contact gcc-patches-help@gcc.gnu.org; run by ezmlm Precedence: bulk List-Id: List-Unsubscribe: List-Archive: List-Post: List-Help: Sender: gcc-patches-owner@gcc.gnu.org Delivered-To: mailing list gcc-patches@gcc.gnu.org On Mon, Mar 19, 2012 at 9:19 AM, H.J. Lu wrote: > On Mon, Mar 19, 2012 at 8:54 AM, H.J. Lu wrote: >> On Mon, Mar 19, 2012 at 8:51 AM, H.J. Lu wrote: >>> On Sun, Mar 18, 2012 at 1:55 PM, Uros Bizjak wrote: >>>> On Sun, Mar 18, 2012 at 5:01 PM, Uros Bizjak wrote: >>>> >>>>>> I am testing this patch.  OK for trunk if it passes all tests? >>>>> >>>>> No, force_reg will generate a pseudo, so this conversion is valid only >>>>> for !can_create_pseudo (). >>>>> >>>>> At least for *tls_initial_exec_x32_store, you will need a temporary to >>>>> split the pattern after reload. >>> >>> Here is the updated patch to add can_create_pseudo.  I also changed >>> tls_initial_exec_x32 to take an input register operand as thread pointer. >>> >>>> Please try attached patch. It simply throws away all recent >>>> complications w.r.t. to thread pointer and always handles TP in >>>> DImode. >>>> >>>> The testcase: >>>> >>>> --cut here-- >>>> __thread int foo __attribute__ ((tls_model ("initial-exec"))); >>>> >>>> void bar (int x) >>>> { >>>>  foo = x; >>>> } >>>> >>>> int baz (void) >>>> { >>>>  return foo; >>>> } >>>> --cut here-- >>>> >>>> Now compiles to: >>>> >>>> bar: >>>>        movq    foo@gottpoff(%rip), %rax >>>>        movl    %edi, %fs:(%rax) >>>>        ret >>>> >>>> baz: >>>>        movq    foo@gottpoff(%rip), %rax >>>>        movl    %fs:(%rax), %eax >>>>        ret >>>> >>>> In effect, this always generates %fs(%rDI) and emits REX prefix before >>>> mov/add to satisfy brain-dead linkers. >>>> >>>> The patch is bootstrapping now on x86_64-pc-linux-gnu. >>>> >>> >>> For >>> >>> -- >>> extern __thread char c; >>> extern char y; >>> void >>> ie (void) >>> { >>>  y = c; >>> } >>> -- >>> >>> Your patch generates: >>> >>>        movl    %fs:0, %eax >>>        movq    c@gottpoff(%rip), %rdx >>>        movzbl  (%rax,%rdx), %edx >>>        movb    %dl, y(%rip) >>>        ret >>> >>> It can be optimized to: >>> >>>        movq    c@gottpoff(%rip), %rax >>>        movzbl  %fs:(%rax), %eax >>>        movb    %al, y(%rip) >>>        ret >>> >> >> Combine failed: >> >> (set (reg:QI 63 [ c ]) >>    (mem/c:QI (plus:DI (zero_extend:DI (unspec:SI [ >>                        (const_int 0 [0]) >>                    ] UNSPEC_TP)) >>            (mem/u/c:DI (const:DI (unspec:DI [ >>                            (symbol_ref:SI ("c") [flags 0x60] >> ) >>                        ] UNSPEC_GOTNTPOFF)) [2 S8 A8])) [0 c+0 S1 A8])) >> >> > > Wrong testcase.  IT should be > > -- > extern __thread char c; > extern __thread short w; > extern char y; > extern short i; > void > ie (void) > { >  y = c; >  i = w; > } > --- > > I got > >        movl    %fs:0, %eax >        movq    c@gottpoff(%rip), %rdx >        movzbl  (%rax,%rdx), %edx >        movb    %dl, y(%rip) >        movq    w@gottpoff(%rip), %rdx >        movzwl  (%rax,%rdx), %eax >        movw    %ax, i(%rip) >        ret > > It can be > >        movq    c@gottpoff(%rip), %rax >        movzbl  %fs:(%rax), %eax >        movb    %al, y(%rip) >        movq    w@gottpoff(%rip), %rax >        movzwl  %fs:(%rax), %eax >        movw    %ax, i(%rip) >        ret > > How about this patch? I changed 32 TP load to (define_insn "*load_tp_x32_" [(set (match_operand:SWI48x 0 "register_operand" "=r") (unspec:SWI48x [(const_int 0)] UNSPEC_TP))] "TARGET_X32" "mov{l}\t{%%fs:0, %k0|%k0, DWORD PTR fs:0}" [(set_attr "type" "imov") (set_attr "modrm" "0") (set_attr "length" "7") (set_attr "memory" "load") (set_attr "imm_disp" "false")]) and removed *load_tp_x32_zext. diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c index 9aa5ee7..66221e4 100644 --- a/gcc/config/i386/i386.c +++ b/gcc/config/i386/i386.c @@ -12483,15 +12483,12 @@ legitimize_pic_address (rtx orig, rtx reg) /* Load the thread pointer. If TO_REG is true, force it into a register. */ static rtx -get_thread_pointer (bool to_reg) +get_thread_pointer (enum machine_mode tp_mode, bool to_reg) { - rtx tp = gen_rtx_UNSPEC (ptr_mode, gen_rtvec (1, const0_rtx), UNSPEC_TP); - - if (GET_MODE (tp) != Pmode) - tp = convert_to_mode (Pmode, tp, 1); + rtx tp = gen_rtx_UNSPEC (tp_mode, gen_rtvec (1, const0_rtx), UNSPEC_TP); if (to_reg) - tp = copy_addr_to_reg (tp); + tp = copy_to_mode_reg (tp_mode, tp); return tp; } @@ -12543,6 +12540,7 @@ legitimize_tls_address (rtx x, enum tls_model model, bool for_mov) { rtx dest, base, off; rtx pic = NULL_RTX, tp = NULL_RTX; + enum machine_mode tp_mode = Pmode; int type; switch (model) @@ -12568,7 +12566,7 @@ legitimize_tls_address (rtx x, enum tls_model model, bool for_mov) else emit_insn (gen_tls_dynamic_gnu2_32 (dest, x, pic)); - tp = get_thread_pointer (true); + tp = get_thread_pointer (Pmode, true); dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, tp, dest)); set_unique_reg_note (get_last_insn (), REG_EQUAL, x); @@ -12618,7 +12616,7 @@ legitimize_tls_address (rtx x, enum tls_model model, bool for_mov) else emit_insn (gen_tls_dynamic_gnu2_32 (base, tmp, pic)); - tp = get_thread_pointer (true); + tp = get_thread_pointer (Pmode, true); set_unique_reg_note (get_last_insn (), REG_EQUAL, gen_rtx_MINUS (Pmode, tmp, tp)); } @@ -12664,27 +12662,18 @@ legitimize_tls_address (rtx x, enum tls_model model, bool for_mov) case TLS_MODEL_INITIAL_EXEC: if (TARGET_64BIT) { + tp_mode = DImode; + if (TARGET_SUN_TLS) { /* The Sun linker took the AMD64 TLS spec literally and can only handle %rax as destination of the initial executable code sequence. */ - dest = gen_reg_rtx (Pmode); + dest = gen_reg_rtx (tp_mode); emit_insn (gen_tls_initial_exec_64_sun (dest, x)); return dest; } - else if (Pmode == SImode) - { - /* Always generate - movl %fs:0, %reg32 - addl xgottpoff(%rip), %reg32 - to support linker IE->LE optimization and avoid - fs:(%reg32) as memory operand. */ - dest = gen_reg_rtx (Pmode); - emit_insn (gen_tls_initial_exec_x32 (dest, x)); - return dest; - } pic = NULL; type = UNSPEC_GOTNTPOFF; @@ -12708,24 +12697,23 @@ legitimize_tls_address (rtx x, enum tls_model model, bool for_mov) type = UNSPEC_INDNTPOFF; } - off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), type); - off = gen_rtx_CONST (Pmode, off); + off = gen_rtx_UNSPEC (tp_mode, gen_rtvec (1, x), type); + off = gen_rtx_CONST (tp_mode, off); if (pic) - off = gen_rtx_PLUS (Pmode, pic, off); - off = gen_const_mem (Pmode, off); + off = gen_rtx_PLUS (tp_mode, pic, off); + off = gen_const_mem (tp_mode, off); set_mem_alias_set (off, ix86_GOT_alias_set ()); if (TARGET_64BIT || TARGET_ANY_GNU_TLS) { - base = get_thread_pointer (for_mov - || !(TARGET_TLS_DIRECT_SEG_REFS - && TARGET_TLS_INDIRECT_SEG_REFS)); - off = force_reg (Pmode, off); - return gen_rtx_PLUS (Pmode, base, off); + base = get_thread_pointer (tp_mode, + for_mov || !TARGET_TLS_DIRECT_SEG_REFS); + off = force_reg (tp_mode, off); + return gen_rtx_PLUS (tp_mode, base, off); } else { - base = get_thread_pointer (true); + base = get_thread_pointer (Pmode, true); dest = gen_reg_rtx (Pmode); emit_insn (ix86_gen_sub3 (dest, base, off)); } @@ -12739,14 +12727,13 @@ legitimize_tls_address (rtx x, enum tls_model model, bool for_mov) if (TARGET_64BIT || TARGET_ANY_GNU_TLS) { - base = get_thread_pointer (for_mov - || !(TARGET_TLS_DIRECT_SEG_REFS - && TARGET_TLS_INDIRECT_SEG_REFS)); + base = get_thread_pointer (Pmode, + for_mov || !TARGET_TLS_DIRECT_SEG_REFS); return gen_rtx_PLUS (Pmode, base, off); } else { - base = get_thread_pointer (true); + base = get_thread_pointer (Pmode, true); dest = gen_reg_rtx (Pmode); emit_insn (ix86_gen_sub3 (dest, base, off)); } @@ -13274,8 +13261,7 @@ ix86_delegitimize_tls_address (rtx orig_x) rtx x = orig_x, unspec; struct ix86_address addr; - if (!(TARGET_TLS_DIRECT_SEG_REFS - && TARGET_TLS_INDIRECT_SEG_REFS)) + if (!TARGET_TLS_DIRECT_SEG_REFS) return orig_x; if (MEM_P (x)) x = XEXP (x, 0); diff --git a/gcc/config/i386/i386.h b/gcc/config/i386/i386.h index 9e5ac00..3fcd209 100644 --- a/gcc/config/i386/i386.h +++ b/gcc/config/i386/i386.h @@ -467,9 +467,6 @@ extern int x86_prefetch_sse; #define TARGET_TLS_DIRECT_SEG_REFS_DEFAULT 0 #endif -/* Address override works only on the (%reg) part of %fs:(%reg). */ -#define TARGET_TLS_INDIRECT_SEG_REFS (Pmode == word_mode) - /* Fence to use after loop using storent. */ extern tree x86_mfence; diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md index d23c67b..e167ceb 100644 --- a/gcc/config/i386/i386.md +++ b/gcc/config/i386/i386.md @@ -12747,20 +12747,9 @@ (define_mode_attr tp_seg [(SI "gs") (DI "fs")]) ;; Load and add the thread base pointer from %:0. -(define_insn "*load_tp_x32" - [(set (match_operand:SI 0 "register_operand" "=r") - (unspec:SI [(const_int 0)] UNSPEC_TP))] - "TARGET_X32" - "mov{l}\t{%%fs:0, %0|%0, DWORD PTR fs:0}" - [(set_attr "type" "imov") - (set_attr "modrm" "0") - (set_attr "length" "7") - (set_attr "memory" "load") - (set_attr "imm_disp" "false")]) - -(define_insn "*load_tp_x32_zext" - [(set (match_operand:DI 0 "register_operand" "=r") - (zero_extend:DI (unspec:SI [(const_int 0)] UNSPEC_TP)))] +(define_insn "*load_tp_x32_" + [(set (match_operand:SWI48x 0 "register_operand" "=r") + (unspec:SWI48x [(const_int 0)] UNSPEC_TP))] "TARGET_X32" "mov{l}\t{%%fs:0, %k0|%k0, DWORD PTR fs:0}" [(set_attr "type" "imov") @@ -12836,28 +12825,6 @@ } [(set_attr "type" "multi")]) -;; When Pmode == SImode, there may be no REX prefix for ADD. Avoid -;; any instructions between MOV and ADD, which may interfere linker -;; IE->LE optimization, since the last byte of the previous instruction -;; before ADD may look like a REX prefix. This also avoids -;; movl x@gottpoff(%rip), %reg32 -;; movl $fs:(%reg32), %reg32 -;; Since address override works only on the (reg32) part in fs:(reg32), -;; we can't use it as memory operand. -(define_insn "tls_initial_exec_x32" - [(set (match_operand:SI 0 "register_operand" "=r") - (unspec:SI - [(match_operand 1 "tls_symbolic_operand")] - UNSPEC_TLS_IE_X32)) - (clobber (reg:CC FLAGS_REG))] - "TARGET_X32" -{ - output_asm_insn - ("mov{l}\t{%%fs:0, %0|%0, DWORD PTR fs:0}", operands); - return "add{l}\t{%a1@gottpoff(%%rip), %0|%0, %a1@gottpoff[rip]}"; -} - [(set_attr "type" "multi")]) - ;; GNU2 TLS patterns can be split. (define_expand "tls_dynamic_gnu2_32"