diff mbox

[v3,i386] : Enable -mstackrealign and 'force_align_arg_pointer' attribute for x86_64

Message ID CAFULd4bL_jt-Z0MmeF0uU1X88ZkN4YDuzXNnRNWdDZ8SAun2-Q@mail.gmail.com
State New
Headers show

Commit Message

Uros Bizjak Oct. 6, 2015, 6:50 p.m. UTC
On Mon, Oct 5, 2015 at 7:33 PM, Uros Bizjak <ubizjak@gmail.com> wrote:

>> As shown in PR 66697 [1] and WineHQ bug [2], an application can
>> misalign incoming stack to less than ABI mandated 16 bytes. While it
>> is possible to use -mincoming-stack-boundary=2  (= 4 bytes) for 32 bit
>> targets to emit stack realignment code, this option is artificially
>> limited to 4 (= 16  bytes) for 64bit targets.
>
> Attached v2 patch goes all the way to enable -mstackrealign and
> 'force_align_arg_pointer' attribute for x86_64. In addition to
> -mincoming-stack-boundary changes, the patch changes
> MIN_STACK_BOUNDARY definition to 8bytes on 64bit targets, as this is
> really the minimum supported stack boundary.
>
> This patch is also needed to allow stack realignment in the interrupt handler.

V3 adds support for unaligned SSE moves outside stack realignment area.

2015-10-06  Uros Bizjak  <ubizjak@gmail.com>

    PR target/66697
    * config/i386/i386.c (ix86_option_override_internal): Always use
    8-byte minimum stack boundary in 64-bit mode.
    (ix86_compute_frame_layout): Remove assert on INCOMING_STACK_BOUNDARY.
    (ix86_emit_save_reg_using_mov): Support unaligned SSE store.
    Add a REG_CFA_EXPRESSION note if needed.
    (ix86_emit_restore_sse_regs_using_mov): Support unaligned SSE load.
    (ix86_handle_force_align_arg_pointer_attribute): New.
    (ix86_minimum_incoming_stack_boundary): Remove TARGET_64BIT check.
    (ix86_attribute_table): Set ix86_force_align_arg_pointer_string
    with ix86_handle_force_align_arg_pointer_attribute.
    * config/i386/i386.h (MIN_STACK_BOUNDARY): Set to BITS_PER_WORD.

testsuite/ChangeLog:

2015-10-06  Uros Bizjak  <ubizjak@gmail.com>

    PR target/66697
    * gcc.target/i386/20060512-1.c: Remove ia32 requirement.
    (PUSH, POP): New defines.
    (sse2_test): Use PUSH and POP to misalign runtime stack.
    * gcc.target/i386/20060512-2.c: Remove ia32 requirement.

Patch was bootstrapped and regression tested on x86_64-linux-gnu {,-m32}.

An earlier version of this patch was also used to compile 64bit Wine,
where it reportedly fixes all unaligned stack failures for x86_64
target. Please see WineHQ bugreport.

I'll wait for a reconfirmation that this slightly changed patch also
works OK for Wine people, before it is committed to mainline and later
backported to gcc-5 branch.

Uros.
diff mbox

Patch

Index: config/i386/i386.c
===================================================================
--- config/i386/i386.c	(revision 228530)
+++ config/i386/i386.c	(working copy)
@@ -5209,8 +5209,7 @@  ix86_option_override_internal (bool main_args_p,
   ix86_incoming_stack_boundary = ix86_default_incoming_stack_boundary;
   if (opts_set->x_ix86_incoming_stack_boundary_arg)
     {
-      int min = (TARGET_64BIT_P (opts->x_ix86_isa_flags)
-		 ? (TARGET_SSE_P (opts->x_ix86_isa_flags) ? 4 : 3) : 2);
+      int min = TARGET_64BIT_P (opts->x_ix86_isa_flags) ? 3 : 2;
 
       if (opts->x_ix86_incoming_stack_boundary_arg < min
 	  || opts->x_ix86_incoming_stack_boundary_arg > 12)
@@ -11391,7 +11390,6 @@  ix86_compute_frame_layout (struct ix86_frame *fram
       /* The only ABI that has saved SSE registers (Win64) also has a
          16-byte aligned default stack, and thus we don't need to be
 	 within the re-aligned local stack frame to save them.  */
-      gcc_assert (INCOMING_STACK_BOUNDARY >= 128);
       offset = ROUND_UP (offset, 16);
       offset += frame->nsseregs * 16;
     }
@@ -11616,14 +11614,26 @@  ix86_emit_save_reg_using_mov (machine_mode mode, u
   struct machine_function *m = cfun->machine;
   rtx reg = gen_rtx_REG (mode, regno);
   rtx mem, addr, base, insn;
+  unsigned int align;
 
   addr = choose_baseaddr (cfa_offset);
   mem = gen_frame_mem (mode, addr);
 
-  /* For SSE saves, we need to indicate the 128-bit alignment.  */
-  set_mem_align (mem, GET_MODE_ALIGNMENT (mode));
+  /* The location is aligned up to INCOMING_STACK_BOUNDARY.  */
+  align = MIN (GET_MODE_ALIGNMENT (mode), INCOMING_STACK_BOUNDARY);
+  set_mem_align (mem, align);
 
-  insn = emit_move_insn (mem, reg);
+  /* SSE saves are not within re-aligned local stack frame.
+     In case INCOMING_STACK_BOUNDARY is misaligned, we have
+     to emit unaligned store.  */
+  if (mode == V4SFmode && align < 128)
+    {
+      rtx unspec = gen_rtx_UNSPEC (mode, gen_rtvec (1, reg), UNSPEC_STOREU);
+      insn = emit_insn (gen_rtx_SET (mem, unspec));
+    }
+  else
+    insn = emit_insn (gen_rtx_SET (mem, reg));
+
   RTX_FRAME_RELATED_P (insn) = 1;
 
   base = addr;
@@ -11670,6 +11680,8 @@  ix86_emit_save_reg_using_mov (machine_mode mode, u
       mem = gen_rtx_MEM (mode, addr);
       add_reg_note (insn, REG_CFA_OFFSET, gen_rtx_SET (mem, reg));
     }
+  else
+    add_reg_note (insn, REG_CFA_EXPRESSION, gen_rtx_SET (mem, reg));
 }
 
 /* Emit code to save registers using MOV insns.
@@ -11886,6 +11898,25 @@  find_drap_reg (void)
     }
 }
 
+/* Handle a "force_align_arg_pointer" attribute.  */
+
+static tree
+ix86_handle_force_align_arg_pointer_attribute (tree *node, tree name,
+					       tree, int, bool *no_add_attrs)
+{
+  if (TREE_CODE (*node) != FUNCTION_TYPE
+      && TREE_CODE (*node) != METHOD_TYPE
+      && TREE_CODE (*node) != FIELD_DECL
+      && TREE_CODE (*node) != TYPE_DECL)
+    {
+      warning (OPT_Wattributes, "%qE attribute only applies to functions",
+	       name);
+      *no_add_attrs = true;
+    }
+
+  return NULL_TREE;
+}
+
 /* Return minimum incoming stack alignment.  */
 
 static unsigned int
@@ -11900,7 +11931,6 @@  ix86_minimum_incoming_stack_boundary (bool sibcall
      if -mstackrealign is used, it isn't used for sibcall check and
      estimated stack alignment is 128bit.  */
   else if (!sibcall
-	   && !TARGET_64BIT
 	   && ix86_force_align_arg_pointer
 	   && crtl->stack_alignment_estimated == 128)
     incoming_stack_boundary = MIN_STACK_BOUNDARY;
@@ -13184,12 +13214,27 @@  ix86_emit_restore_sse_regs_using_mov (HOST_WIDE_IN
       {
 	rtx reg = gen_rtx_REG (V4SFmode, regno);
 	rtx mem;
+	unsigned int align;
 
 	mem = choose_baseaddr (cfa_offset);
 	mem = gen_rtx_MEM (V4SFmode, mem);
-	set_mem_align (mem, 128);
-	emit_move_insn (reg, mem);
 
+	/* The location is aligned up to INCOMING_STACK_BOUNDARY.  */
+	align = MIN (GET_MODE_ALIGNMENT (V4SFmode), INCOMING_STACK_BOUNDARY);
+	set_mem_align (mem, align);
+
+	/* SSE saves are not within re-aligned local stack frame.
+	   In case INCOMING_STACK_BOUNDARY is misaligned, we have
+	   to emit unaligned load.  */
+	if (align < 128)
+	  {
+	    rtx unspec = gen_rtx_UNSPEC (V4SFmode, gen_rtvec (1, mem),
+					 UNSPEC_LOADU);
+	    emit_insn (gen_rtx_SET (reg, unspec));
+	  }
+	else
+	  emit_insn (gen_rtx_SET (reg, mem));
+
 	ix86_add_cfa_restore_note (NULL, reg, cfa_offset);
 
 	cfa_offset -= GET_MODE_SIZE (V4SFmode);
@@ -48159,7 +48204,7 @@  static const struct attribute_spec ix86_attribute_
     true },
   /* force_align_arg_pointer says this function realigns the stack at entry.  */
   { (const char *)&ix86_force_align_arg_pointer_string, 0, 0,
-    false, true,  true, ix86_handle_cconv_attribute, false },
+    false, true,  true, ix86_handle_force_align_arg_pointer_attribute, false },
 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
   { "dllimport", 0, 0, false, false, false, handle_dll_attribute, false },
   { "dllexport", 0, 0, false, false, false, handle_dll_attribute, false },
Index: config/i386/i386.h
===================================================================
--- config/i386/i386.h	(revision 228530)
+++ config/i386/i386.h	(working copy)
@@ -755,7 +755,7 @@  extern const char *host_detect_local_cpu (int argc
 #define MAIN_STACK_BOUNDARY (TARGET_64BIT ? 128 : 32)
 
 /* Minimum stack boundary.  */
-#define MIN_STACK_BOUNDARY (TARGET_64BIT ? (TARGET_SSE ? 128 : 64) : 32)
+#define MIN_STACK_BOUNDARY BITS_PER_WORD
 
 /* Boundary (in *bits*) on which the stack pointer prefers to be
    aligned; the compiler cannot rely on having this alignment.  */
Index: testsuite/gcc.target/i386/20060512-1.c
===================================================================
--- testsuite/gcc.target/i386/20060512-1.c	(revision 228530)
+++ testsuite/gcc.target/i386/20060512-1.c	(working copy)
@@ -1,5 +1,4 @@ 
 /* { dg-do run } */
-/* { dg-require-effective-target ia32 } */
 /* { dg-options "-std=gnu99 -msse2 -mpreferred-stack-boundary=4" } */
 /* { dg-require-effective-target sse2 } */
 
@@ -7,6 +6,14 @@ 
 
 #include <emmintrin.h>
 
+#ifdef __x86_64__
+# define PUSH "pushq %rsi"
+# define POP "popq %rsi"
+#else
+# define PUSH "pushl %esi"
+# define POP "popl %esi"
+#endif
+
 __m128i __attribute__ ((__noinline__))
 vector_using_function ()
 {
@@ -27,9 +34,9 @@  static void
 sse2_test (void)
 {
   int result;
-  asm ("pushl %esi");		/* Disalign runtime stack.  */
+  asm (PUSH);                  /* Misalign runtime stack.  */
   result = self_aligning_function (g_1, g_2);
   if (result != 42)
     abort ();
-  asm ("popl %esi");
+  asm (POP);
 }
Index: testsuite/gcc.target/i386/20060512-2.c
===================================================================
--- testsuite/gcc.target/i386/20060512-2.c	(revision 228530)
+++ testsuite/gcc.target/i386/20060512-2.c	(working copy)
@@ -1,5 +1,4 @@ 
 /* { dg-do compile } */
-/* { dg-require-effective-target ia32 } */
 /* { dg-options "-std=gnu99 -mpreferred-stack-boundary=4" } */
 int
 outer_function (int x, int y)