Patchwork PATCH: Properly generate X32 IE sequence

login
register
mail settings
Submitter Uros Bizjak
Date March 18, 2012, 8:55 p.m.
Message ID <CAFULd4ZsW4ihcgqE8nnZTd75Z51uWe=pQLHqVMvKyhTshDANHw@mail.gmail.com>
Download mbox | patch
Permalink /patch/147426/
State New
Headers show

Comments

Uros Bizjak - March 18, 2012, 8:55 p.m.
On Sun, Mar 18, 2012 at 5:01 PM, Uros Bizjak <ubizjak@gmail.com> wrote:

>> I am testing this patch.  OK for trunk if it passes all tests?
>
> No, force_reg will generate a pseudo, so this conversion is valid only
> for !can_create_pseudo ().
>
> At least for *tls_initial_exec_x32_store, you will need a temporary to
> split the pattern after reload.

Please try attached patch. It simply throws away all recent
complications w.r.t. to thread pointer and always handles TP in
DImode.

The testcase:

--cut here--
__thread int foo __attribute__ ((tls_model ("initial-exec")));

void bar (int x)
{
  foo = x;
}

int baz (void)
{
  return foo;
}
--cut here--

Now compiles to:

bar:
        movq    foo@gottpoff(%rip), %rax
        movl    %edi, %fs:(%rax)
        ret

baz:
        movq    foo@gottpoff(%rip), %rax
        movl    %fs:(%rax), %eax
        ret

In effect, this always generates %fs(%rDI) and emits REX prefix before
mov/add to satisfy brain-dead linkers.

The patch is bootstrapping now on x86_64-pc-linux-gnu.

Uros.
Eric Botcazou - March 20, 2012, 8:51 a.m.
> The patch is bootstrapping now on x86_64-pc-linux-gnu.

It very likely breaks bootstrap with RTL checking enabled:

/sil.a/gnatmail/gnatmail-x/build-sil/x86-linux/gnat/obj/./gcc/xgcc -B/sil.a/gnatmail/gnatmail-x/build-sil/x86-linux/gnat/obj/./gcc/ -B/usr/gnat/i686-pc-linux-gnu/bin/ -B/usr/gnat/i686-pc-linux-gnu/lib/ -isystem /usr/gnat/i686-pc-linux-gnu/include -isystem /usr/gnat/i686-pc-linux-gnu/sys-include    -g -O2 -O2  -g -O2 -DIN_GCC   -W -Wall -Wwrite-strings -Wcast-qual -Wstrict-prototypes -Wmissing-prototypes -Wold-style-definition  -isystem ./include   -fpic -g -DIN_LIBGCC2 -fbuilding-libgcc -fno-stack-protector   -fpic -I. -I. -I../.././gcc -I../../../src/libgcc -I../../../src/libgcc/. -I../../../src/libgcc/../gcc -I../../../src/libgcc/../include -I../../../src/libgcc/config/libbid -DENABLE_DECIMAL_BID_FORMAT -DHAVE_CC_TLS  -DUSE_TLS -o 
_popcountsi2.o -MT _popcountsi2.o -MD -MP -MF 
_popcountsi2.dep -DL_popcountsi2 -c ../../../src/libgcc/libgcc2.c -fvisibility=hidden -DHIDE_EXPORTS
../../../src/libgcc/libgcc2.c: In function '__popcountsi2':
../../../src/libgcc/libgcc2.c:835:1: internal compiler error: RTL check: 
expected elt 1 type 'i' or 'n', have '0' (rtx mem) in ix86_decompose_address, 
at config/i386/i386.c:11522
Please submit a full bug report,
with preprocessed source if appropriate.
See <URL:mailto:report@adacore.com> for instructions.
make[3]: *** [_popcountsi2.o] Error 1

Patch

Index: i386.md
===================================================================
--- i386.md	(revision 185505)
+++ i386.md	(working copy)
@@ -12836,28 +12836,6 @@ 
 }
   [(set_attr "type" "multi")])
 
-;; When Pmode == SImode, there may be no REX prefix for ADD.  Avoid
-;; any instructions between MOV and ADD, which may interfere linker
-;; IE->LE optimization, since the last byte of the previous instruction
-;; before ADD may look like a REX prefix.  This also avoids
-;;	movl x@gottpoff(%rip), %reg32
-;;	movl $fs:(%reg32), %reg32
-;; Since address override works only on the (reg32) part in fs:(reg32),
-;; we can't use it as memory operand.
-(define_insn "tls_initial_exec_x32"
-  [(set (match_operand:SI 0 "register_operand" "=r")
-	(unspec:SI
-	 [(match_operand 1 "tls_symbolic_operand")]
-	 UNSPEC_TLS_IE_X32))
-   (clobber (reg:CC FLAGS_REG))]
-  "TARGET_X32"
-{
-  output_asm_insn
-    ("mov{l}\t{%%fs:0, %0|%0, DWORD PTR fs:0}", operands);
-  return "add{l}\t{%a1@gottpoff(%%rip), %0|%0, %a1@gottpoff[rip]}";
-}
-  [(set_attr "type" "multi")])
-
 ;; GNU2 TLS patterns can be split.
 
 (define_expand "tls_dynamic_gnu2_32"
Index: i386.c
===================================================================
--- i386.c	(revision 185504)
+++ i386.c	(working copy)
@@ -11509,6 +11509,10 @@  ix86_decompose_address (rtx addr, struct ix86_addr
 	      scale = 1 << scale;
 	      break;
 
+	    case ZERO_EXTEND:
+	      op = XEXP (op, 0);
+	      /* FALLTHRU */
+
 	    case UNSPEC:
 	      if (XINT (op, 1) == UNSPEC_TP
 	          && TARGET_TLS_DIRECT_SEG_REFS
@@ -12478,15 +12482,15 @@  legitimize_pic_address (rtx orig, rtx reg)
 /* Load the thread pointer.  If TO_REG is true, force it into a register.  */
 
 static rtx
-get_thread_pointer (bool to_reg)
+get_thread_pointer (enum machine_mode tp_mode, bool to_reg)
 {
   rtx tp = gen_rtx_UNSPEC (ptr_mode, gen_rtvec (1, const0_rtx), UNSPEC_TP);
 
-  if (GET_MODE (tp) != Pmode)
-    tp = convert_to_mode (Pmode, tp, 1);
+  if (GET_MODE (tp) != tp_mode)
+    tp = convert_to_mode (tp_mode, tp, 1);
 
   if (to_reg)
-    tp = copy_addr_to_reg (tp);
+    tp = copy_to_mode_reg (tp_mode, tp);
 
   return tp;
 }
@@ -12538,6 +12542,7 @@  legitimize_tls_address (rtx x, enum tls_model mode
 {
   rtx dest, base, off;
   rtx pic = NULL_RTX, tp = NULL_RTX;
+  enum machine_mode tp_mode = Pmode;
   int type;
 
   switch (model)
@@ -12563,7 +12568,7 @@  legitimize_tls_address (rtx x, enum tls_model mode
 	  else
 	    emit_insn (gen_tls_dynamic_gnu2_32 (dest, x, pic));
 
-	  tp = get_thread_pointer (true);
+	  tp = get_thread_pointer (Pmode, true);
 	  dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, tp, dest));
 
 	  set_unique_reg_note (get_last_insn (), REG_EQUAL, x);
@@ -12613,7 +12618,7 @@  legitimize_tls_address (rtx x, enum tls_model mode
 	  else
 	    emit_insn (gen_tls_dynamic_gnu2_32 (base, tmp, pic));
 
-	  tp = get_thread_pointer (true);
+	  tp = get_thread_pointer (Pmode, true);
 	  set_unique_reg_note (get_last_insn (), REG_EQUAL,
 			       gen_rtx_MINUS (Pmode, tmp, tp));
 	}
@@ -12659,27 +12664,18 @@  legitimize_tls_address (rtx x, enum tls_model mode
     case TLS_MODEL_INITIAL_EXEC:
       if (TARGET_64BIT)
 	{
+	  tp_mode = DImode;
+
 	  if (TARGET_SUN_TLS)
 	    {
 	      /* The Sun linker took the AMD64 TLS spec literally
 		 and can only handle %rax as destination of the
 		 initial executable code sequence.  */
 
-	      dest = gen_reg_rtx (Pmode);
+	      dest = gen_reg_rtx (tp_mode);
 	      emit_insn (gen_tls_initial_exec_64_sun (dest, x));
 	      return dest;
 	    }
-	  else if (Pmode == SImode)
-	    {
-	      /* Always generate
-			movl %fs:0, %reg32
-			addl xgottpoff(%rip), %reg32
-		 to support linker IE->LE optimization and avoid
-		 fs:(%reg32) as memory operand.  */
-	      dest = gen_reg_rtx (Pmode);
-	      emit_insn (gen_tls_initial_exec_x32 (dest, x));
-	      return dest;
-	    }
 
 	  pic = NULL;
 	  type = UNSPEC_GOTNTPOFF;
@@ -12703,24 +12699,23 @@  legitimize_tls_address (rtx x, enum tls_model mode
 	  type = UNSPEC_INDNTPOFF;
 	}
 
-      off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), type);
-      off = gen_rtx_CONST (Pmode, off);
+      off = gen_rtx_UNSPEC (tp_mode, gen_rtvec (1, x), type);
+      off = gen_rtx_CONST (tp_mode, off);
       if (pic)
-	off = gen_rtx_PLUS (Pmode, pic, off);
-      off = gen_const_mem (Pmode, off);
+	off = gen_rtx_PLUS (tp_mode, pic, off);
+      off = gen_const_mem (tp_mode, off);
       set_mem_alias_set (off, ix86_GOT_alias_set ());
 
       if (TARGET_64BIT || TARGET_ANY_GNU_TLS)
 	{
-          base = get_thread_pointer (for_mov
-				     || !(TARGET_TLS_DIRECT_SEG_REFS
-					  && TARGET_TLS_INDIRECT_SEG_REFS));
-	  off = force_reg (Pmode, off);
-	  return gen_rtx_PLUS (Pmode, base, off);
+	  base = get_thread_pointer (tp_mode,
+				     for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
+	  off = force_reg (tp_mode, off);
+	  return gen_rtx_PLUS (tp_mode, base, off);
 	}
       else
 	{
-	  base = get_thread_pointer (true);
+	  base = get_thread_pointer (Pmode, true);
 	  dest = gen_reg_rtx (Pmode);
 	  emit_insn (gen_subsi3 (dest, base, off));
 	}
@@ -12734,14 +12729,13 @@  legitimize_tls_address (rtx x, enum tls_model mode
 
       if (TARGET_64BIT || TARGET_ANY_GNU_TLS)
 	{
-	  base = get_thread_pointer (for_mov
-				     || !(TARGET_TLS_DIRECT_SEG_REFS
-					  && TARGET_TLS_INDIRECT_SEG_REFS));
+	  base = get_thread_pointer (Pmode,
+				     for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
 	  return gen_rtx_PLUS (Pmode, base, off);
 	}
       else
 	{
-	  base = get_thread_pointer (true);
+	  base = get_thread_pointer (Pmode, true);
 	  dest = gen_reg_rtx (Pmode);
 	  emit_insn (gen_subsi3 (dest, base, off));
 	}
@@ -13269,8 +13263,7 @@  ix86_delegitimize_tls_address (rtx orig_x)
   rtx x = orig_x, unspec;
   struct ix86_address addr;
 
-  if (!(TARGET_TLS_DIRECT_SEG_REFS
-	&& TARGET_TLS_INDIRECT_SEG_REFS))
+  if (!TARGET_TLS_DIRECT_SEG_REFS)
     return orig_x;
   if (MEM_P (x))
     x = XEXP (x, 0);
Index: i386.h
===================================================================
--- i386.h	(revision 185504)
+++ i386.h	(working copy)
@@ -467,9 +467,6 @@  extern int x86_prefetch_sse;
 #define TARGET_TLS_DIRECT_SEG_REFS_DEFAULT 0
 #endif
 
-/* Address override works only on the (%reg) part of %fs:(%reg).  */
-#define TARGET_TLS_INDIRECT_SEG_REFS (Pmode == word_mode)
-
 /* Fence to use after loop using storent.  */
 
 extern tree x86_mfence;