diff mbox

[02/10] addr32: Output REX prefix for UNSPEC_GOTNTPOFF

Message ID 20120302203614.GA2179@intel.com
State New
Headers show

Commit Message

H.J. Lu March 2, 2012, 8:36 p.m. UTC
X86-64 linker optimizes TLS_MODEL_INITIAL_EXEC to TLS_MODEL_LOCAL_EXEC
by checking

	movq foo@gottpoff(%rip), %reg

and

	addq foo@gottpoff(%rip), %reg

It uses the REX prefix to avoid the last byte of the previous
instruction.  With 32bit Pmode, we may not have the REX prefix and
the last byte of the previous instruction may be an offset, which
may look like a REX prefix.  IE->LE optimization will generate corrupted
binary.  This patch makes sure we always output an REX pfrefix for
UNSPEC_GOTNTPOFF.  OK for trunk?

Thanks.

H.J.

2012-03-02  H.J. Lu  <hongjiu.lu@intel.com>

	* config/i386/i386-protos.h (ix86_output_rex_prefix_p): New.
	* config/i386/i386.c (ix86_output_rex_prefix_p): Likewise.

	* config/i386/i386.md (*movsi_internal): Output REX prefix if
	needed.
	(*add<mode>_1): Likewise.

Comments

Uros Bizjak March 4, 2012, 10:12 p.m. UTC | #1
On Fri, Mar 2, 2012 at 9:36 PM, H.J. Lu <hongjiu.lu@intel.com> wrote:

> X86-64 linker optimizes TLS_MODEL_INITIAL_EXEC to TLS_MODEL_LOCAL_EXEC
> by checking
>
>        movq foo@gottpoff(%rip), %reg
>
> and
>
>        addq foo@gottpoff(%rip), %reg
>
> It uses the REX prefix to avoid the last byte of the previous
> instruction.  With 32bit Pmode, we may not have the REX prefix and
> the last byte of the previous instruction may be an offset, which
> may look like a REX prefix.  IE->LE optimization will generate corrupted
> binary.  This patch makes sure we always output an REX pfrefix for
> UNSPEC_GOTNTPOFF.  OK for trunk?

No, please implement this using UNSPEC in the same way as
tls_initial_exec_64_sun implements Sun linker quirk.

Uros.
H.J. Lu March 4, 2012, 10:38 p.m. UTC | #2
On Sun, Mar 4, 2012 at 2:12 PM, Uros Bizjak <ubizjak@gmail.com> wrote:
> On Fri, Mar 2, 2012 at 9:36 PM, H.J. Lu <hongjiu.lu@intel.com> wrote:
>
>> X86-64 linker optimizes TLS_MODEL_INITIAL_EXEC to TLS_MODEL_LOCAL_EXEC
>> by checking
>>
>>        movq foo@gottpoff(%rip), %reg
>>
>> and
>>
>>        addq foo@gottpoff(%rip), %reg
>>
>> It uses the REX prefix to avoid the last byte of the previous
>> instruction.  With 32bit Pmode, we may not have the REX prefix and
>> the last byte of the previous instruction may be an offset, which
>> may look like a REX prefix.  IE->LE optimization will generate corrupted
>> binary.  This patch makes sure we always output an REX pfrefix for
>> UNSPEC_GOTNTPOFF.  OK for trunk?
>
> No, please implement this using UNSPEC in the same way as
> tls_initial_exec_64_sun implements Sun linker quirk.
>

I am not sure how it can be done with UNSPEC cleanly.
Unlike the Sun linker issue, this is an instruction encoding
issue.  At instruction pattern level, there is no difference
between x32 and x86-64. I need to make sure that there is
always one and only one REX prefix with UNSPEC_GOTNTPOFF.
If REG is r8-r15, we shouldn't add another REX prefix.
Uros Bizjak March 4, 2012, 10:52 p.m. UTC | #3
On Sun, Mar 4, 2012 at 11:38 PM, H.J. Lu <hjl.tools@gmail.com> wrote:
> On Sun, Mar 4, 2012 at 2:12 PM, Uros Bizjak <ubizjak@gmail.com> wrote:
>> On Fri, Mar 2, 2012 at 9:36 PM, H.J. Lu <hongjiu.lu@intel.com> wrote:
>>
>>> X86-64 linker optimizes TLS_MODEL_INITIAL_EXEC to TLS_MODEL_LOCAL_EXEC
>>> by checking
>>>
>>>        movq foo@gottpoff(%rip), %reg
>>>
>>> and
>>>
>>>        addq foo@gottpoff(%rip), %reg
>>>
>>> It uses the REX prefix to avoid the last byte of the previous
>>> instruction.  With 32bit Pmode, we may not have the REX prefix and
>>> the last byte of the previous instruction may be an offset, which
>>> may look like a REX prefix.  IE->LE optimization will generate corrupted
>>> binary.  This patch makes sure we always output an REX pfrefix for
>>> UNSPEC_GOTNTPOFF.  OK for trunk?
>>
>> No, please implement this using UNSPEC in the same way as
>> tls_initial_exec_64_sun implements Sun linker quirk.
>>
>
> I am not sure how it can be done with UNSPEC cleanly.
> Unlike the Sun linker issue, this is an instruction encoding
> issue.  At instruction pattern level, there is no difference
> between x32 and x86-64. I need to make sure that there is
> always one and only one REX prefix with UNSPEC_GOTNTPOFF.
> If REG is r8-r15, we shouldn't add another REX prefix.

(define_insn "tls_initial_exec_x32"
  [(set (match_operand:SI 0 "register_operand" "=r")
	(unspec:SI
	 [(match_operand:SI 1 "tls_symbolic_operand" "")]
	 UNSPEC_TLS_IE_X32))
   (clobber (reg:CC FLAGS_REG))]
  "TARGET_X32"
{
  if (!REX_INT_REG_P (operands[0])
    fputs (\trex\n", asm_out_file);
  output_asm_insn
    ("mov{l}\t{%%fs:0, %0|%0, QWORD PTR fs:0}", operands);
  if (!REX_INT_REG_P (operands[0])
    fputs (\trex\n", asm_out_file);
  return "add{l}\t{%a1@gottpoff(%%rip), %0|%0, %a1@gottpoff[rip]}";
}
  [(set_attr "type" "multi")])

rex or rex64 prefix, whatever you wish.

Then generate this pattern from legitimize_tls_address,
<TLS_MODEL_INITIAL_EXEC>, see TARGET_SUN_TLS part.

Uros.
H.J. Lu March 5, 2012, 4:06 a.m. UTC | #4
On Sun, Mar 4, 2012 at 2:52 PM, Uros Bizjak <ubizjak@gmail.com> wrote:
> On Sun, Mar 4, 2012 at 11:38 PM, H.J. Lu <hjl.tools@gmail.com> wrote:
>> On Sun, Mar 4, 2012 at 2:12 PM, Uros Bizjak <ubizjak@gmail.com> wrote:
>>> On Fri, Mar 2, 2012 at 9:36 PM, H.J. Lu <hongjiu.lu@intel.com> wrote:
>>>
>>>> X86-64 linker optimizes TLS_MODEL_INITIAL_EXEC to TLS_MODEL_LOCAL_EXEC
>>>> by checking
>>>>
>>>>        movq foo@gottpoff(%rip), %reg
>>>>
>>>> and
>>>>
>>>>        addq foo@gottpoff(%rip), %reg
>>>>
>>>> It uses the REX prefix to avoid the last byte of the previous
>>>> instruction.  With 32bit Pmode, we may not have the REX prefix and
>>>> the last byte of the previous instruction may be an offset, which
>>>> may look like a REX prefix.  IE->LE optimization will generate corrupted
>>>> binary.  This patch makes sure we always output an REX pfrefix for
>>>> UNSPEC_GOTNTPOFF.  OK for trunk?
>>>
>>> No, please implement this using UNSPEC in the same way as
>>> tls_initial_exec_64_sun implements Sun linker quirk.
>>>
>>
>> I am not sure how it can be done with UNSPEC cleanly.
>> Unlike the Sun linker issue, this is an instruction encoding
>> issue.  At instruction pattern level, there is no difference
>> between x32 and x86-64. I need to make sure that there is
>> always one and only one REX prefix with UNSPEC_GOTNTPOFF.
>> If REG is r8-r15, we shouldn't add another REX prefix.
>
> (define_insn "tls_initial_exec_x32"
>  [(set (match_operand:SI 0 "register_operand" "=r")
>        (unspec:SI
>         [(match_operand:SI 1 "tls_symbolic_operand" "")]
>         UNSPEC_TLS_IE_X32))
>   (clobber (reg:CC FLAGS_REG))]
>  "TARGET_X32"
> {
>  if (!REX_INT_REG_P (operands[0])
>    fputs (\trex\n", asm_out_file);
>  output_asm_insn
>    ("mov{l}\t{%%fs:0, %0|%0, QWORD PTR fs:0}", operands);
>  if (!REX_INT_REG_P (operands[0])
>    fputs (\trex\n", asm_out_file);
>  return "add{l}\t{%a1@gottpoff(%%rip), %0|%0, %a1@gottpoff[rip]}";
> }
>  [(set_attr "type" "multi")])
>
> rex or rex64 prefix, whatever you wish.
>

Will it introduce bugs? The normal code look like

      off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), type);
      off = gen_rtx_CONST (Pmode, off);
      if (pic)
        off = gen_rtx_PLUS (Pmode, pic, off);
      off = gen_const_mem (Pmode, off);
      set_mem_alias_set (off, ix86_GOT_alias_set ());

      if (TARGET_64BIT || TARGET_ANY_GNU_TLS)
        {
          base = get_thread_pointer (for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
          off = force_reg (Pmode, off);
          return gen_rtx_PLUS (Pmode, base, off);
        }

There is a call to

 set_mem_alias_set (off, ix86_GOT_alias_set ());
Uros Bizjak March 5, 2012, 3:31 p.m. UTC | #5
On Fri, Mar 2, 2012 at 9:36 PM, H.J. Lu <hongjiu.lu@intel.com> wrote:

> X86-64 linker optimizes TLS_MODEL_INITIAL_EXEC to TLS_MODEL_LOCAL_EXEC
> by checking
>
>        movq foo@gottpoff(%rip), %reg
>
> and
>
>        addq foo@gottpoff(%rip), %reg
>
> It uses the REX prefix to avoid the last byte of the previous
> instruction.  With 32bit Pmode, we may not have the REX prefix and
> the last byte of the previous instruction may be an offset, which
> may look like a REX prefix.  IE->LE optimization will generate corrupted
> binary.  This patch makes sure we always output an REX pfrefix for
> UNSPEC_GOTNTPOFF.  OK for trunk?

Actually, linker has:

    case R_X86_64_GOTTPOFF:
      /* Check transition from IE access model:
		mov foo@gottpoff(%rip), %reg
		add foo@gottpoff(%rip), %reg
       */

      /* Check REX prefix first.  */
      if (offset >= 3 && (offset + 4) <= sec->size)
	{
	  val = bfd_get_8 (abfd, contents + offset - 3);
	  if (val != 0x48 && val != 0x4c)
	    {
	      /* X32 may have 0x44 REX prefix or no REX prefix.  */
	      if (ABI_64_P (abfd))
		return FALSE;
	    }
	}
      else
	{
	  /* X32 may not have any REX prefix.  */
	  if (ABI_64_P (abfd))
	    return FALSE;
	  if (offset < 2 || (offset + 3) > sec->size)
	    return FALSE;
	}

So, it should handle the case without REX just OK. If it doesn't, then
this is a bug in binutils.

Uros.
H.J. Lu March 5, 2012, 5:03 p.m. UTC | #6
On Mon, Mar 5, 2012 at 7:31 AM, Uros Bizjak <ubizjak@gmail.com> wrote:
> On Fri, Mar 2, 2012 at 9:36 PM, H.J. Lu <hongjiu.lu@intel.com> wrote:
>
>> X86-64 linker optimizes TLS_MODEL_INITIAL_EXEC to TLS_MODEL_LOCAL_EXEC
>> by checking
>>
>>        movq foo@gottpoff(%rip), %reg
>>
>> and
>>
>>        addq foo@gottpoff(%rip), %reg
>>
>> It uses the REX prefix to avoid the last byte of the previous
>> instruction.  With 32bit Pmode, we may not have the REX prefix and
>> the last byte of the previous instruction may be an offset, which
>> may look like a REX prefix.  IE->LE optimization will generate corrupted
>> binary.  This patch makes sure we always output an REX pfrefix for
>> UNSPEC_GOTNTPOFF.  OK for trunk?
>
> Actually, linker has:
>
>    case R_X86_64_GOTTPOFF:
>      /* Check transition from IE access model:
>                mov foo@gottpoff(%rip), %reg
>                add foo@gottpoff(%rip), %reg
>       */
>
>      /* Check REX prefix first.  */
>      if (offset >= 3 && (offset + 4) <= sec->size)
>        {
>          val = bfd_get_8 (abfd, contents + offset - 3);
>          if (val != 0x48 && val != 0x4c)
>            {
>              /* X32 may have 0x44 REX prefix or no REX prefix.  */
>              if (ABI_64_P (abfd))
>                return FALSE;
>            }
>        }
>      else
>        {
>          /* X32 may not have any REX prefix.  */
>          if (ABI_64_P (abfd))
>            return FALSE;
>          if (offset < 2 || (offset + 3) > sec->size)
>            return FALSE;
>        }
>
> So, it should handle the case without REX just OK. If it doesn't, then
> this is a bug in binutils.
>

The last byte of the displacement in the previous instruction
may happen to look like a REX byte. In that case, linker
will overwrite the last byte of the previous instruction and
generate the wrong instruction sequence.

I need to update linker to enforce the REX byte check.
Uros Bizjak March 5, 2012, 5:25 p.m. UTC | #7
On Mon, Mar 5, 2012 at 6:03 PM, H.J. Lu <hjl.tools@gmail.com> wrote:

>>> X86-64 linker optimizes TLS_MODEL_INITIAL_EXEC to TLS_MODEL_LOCAL_EXEC
>>> by checking
>>>
>>>        movq foo@gottpoff(%rip), %reg
>>>
>>> and
>>>
>>>        addq foo@gottpoff(%rip), %reg
>>>
>>> It uses the REX prefix to avoid the last byte of the previous
>>> instruction.  With 32bit Pmode, we may not have the REX prefix and
>>> the last byte of the previous instruction may be an offset, which
>>> may look like a REX prefix.  IE->LE optimization will generate corrupted
>>> binary.  This patch makes sure we always output an REX pfrefix for
>>> UNSPEC_GOTNTPOFF.  OK for trunk?
>>
>> Actually, linker has:
>>
>>    case R_X86_64_GOTTPOFF:
>>      /* Check transition from IE access model:
>>                mov foo@gottpoff(%rip), %reg
>>                add foo@gottpoff(%rip), %reg
>>       */
>>
>>      /* Check REX prefix first.  */
>>      if (offset >= 3 && (offset + 4) <= sec->size)
>>        {
>>          val = bfd_get_8 (abfd, contents + offset - 3);
>>          if (val != 0x48 && val != 0x4c)
>>            {
>>              /* X32 may have 0x44 REX prefix or no REX prefix.  */
>>              if (ABI_64_P (abfd))
>>                return FALSE;
>>            }
>>        }
>>      else
>>        {
>>          /* X32 may not have any REX prefix.  */
>>          if (ABI_64_P (abfd))
>>            return FALSE;
>>          if (offset < 2 || (offset + 3) > sec->size)
>>            return FALSE;
>>        }
>>
>> So, it should handle the case without REX just OK. If it doesn't, then
>> this is a bug in binutils.
>>
>
> The last byte of the displacement in the previous instruction
> may happen to look like a REX byte. In that case, linker
> will overwrite the last byte of the previous instruction and
> generate the wrong instruction sequence.
>
> I need to update linker to enforce the REX byte check.

One important observation: if we want to follow the x86_64 TLS spec
strictly, we have to use existing DImode patterns only. This also
means that we should NOT convert other TLS patterns to Pmode, since
they explicitly state movq and addq. If this is not the case, then we
need new TLS specification for X32.

Uros.
>
> --
> H.J.
diff mbox

Patch

diff --git a/gcc/config/i386/i386-protos.h b/gcc/config/i386/i386-protos.h
index 630112f..a9b9d3f 100644
--- a/gcc/config/i386/i386-protos.h
+++ b/gcc/config/i386/i386-protos.h
@@ -277,6 +277,8 @@  extern void x86_output_aligned_bss (FILE *, tree, const char *,
 extern void x86_elf_aligned_common (FILE *, const char *,
 				    unsigned HOST_WIDE_INT, int);
 
+extern bool ix86_output_rex_prefix_p (rtx, rtx);
+
 #ifdef RTX_CODE
 extern void ix86_fp_comparison_codes (enum rtx_code code, enum rtx_code *,
 				      enum rtx_code *, enum rtx_code *);
diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
index ac9c714..2cbfb64 100644
--- a/gcc/config/i386/i386.c
+++ b/gcc/config/i386/i386.c
@@ -14670,6 +14670,29 @@  i386_asm_output_addr_const_extra (FILE *file, rtx x)
 
   return true;
 }
+
+/* Since x64-64 linker IE->LE transition requires a REX prefix, we
+   output a REX prefix if there isn't one.  */
+
+bool
+ix86_output_rex_prefix_p (rtx dest, rtx op)
+{
+  if (!TARGET_X32
+      || GET_MODE (dest) != SImode
+      || REX_INT_REG_P (dest)
+      || !MEM_P (op))
+    return false;
+
+  op = XEXP (op, 0);
+  if (GET_CODE (op) != CONST)
+    return false;
+
+  op = XEXP (op, 0);
+  if (GET_CODE (op) != UNSPEC)
+    return false;
+
+  return XINT (op, 1) == UNSPEC_GOTNTPOFF;
+}
 
 /* Split one or more double-mode RTL references into pairs of half-mode
    references.  The RTL can be REG, offsettable MEM, integer constant, or
diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md
index 8fc7918..35b2673 100644
--- a/gcc/config/i386/i386.md
+++ b/gcc/config/i386/i386.md
@@ -2211,7 +2211,13 @@ 
       if (ix86_use_lea_for_mov (insn, operands))
 	return "lea{l}\t{%a1, %0|%0, %a1}";
       else
-	return "mov{l}\t{%1, %0|%0, %1}";
+	{
+	  /* Output REX prefix if needed.  */
+	  if (ix86_output_rex_prefix_p (operands[0], operands[1]))
+	    return "rex mov{l}\t{%1, %0|%0, %1}";
+	  else
+	    return "mov{l}\t{%1, %0|%0, %1}";
+	}
     }
 }
   [(set (attr "type")
@@ -5540,7 +5546,11 @@ 
       if (x86_maybe_negate_const_int (&operands[2], <MODE>mode))
         return "sub{<imodesuffix>}\t{%2, %0|%0, %2}";
 
-      return "add{<imodesuffix>}\t{%2, %0|%0, %2}";
+      /* Output REX prefix if needed.  */
+      if (ix86_output_rex_prefix_p (operands[0], operands[2]))
+	return "rex add{<imodesuffix>}\t{%2, %0|%0, %2}";
+      else
+	return "add{<imodesuffix>}\t{%2, %0|%0, %2}";
     }
 }
   [(set (attr "type")