diff mbox

[03/12,i386] Use re-aligned stack pointer for aligned SSE movs

Message ID 20170427080932.11703-3-daniel.santos@pobox.com
State New
Headers show

Commit Message

Daniel Santos April 27, 2017, 8:09 a.m. UTC
Add an optional `align' parameter to choose_baseaddr, allowing the
caller to request an address that is aligned to some boundary.  Modify
ix86_emit_save_regs_using_mov and ix86_emit_restore_regs_using_mov use
optimally aligned memory when such a base register is available.

Signed-off-by: Daniel Santos <daniel.santos@pobox.com>
---
 gcc/config/i386/i386.c  | 111 ++++++++++++++++++++++++++++++++++++++----------
 gcc/config/i386/winnt.c |   3 +-
 2 files changed, 90 insertions(+), 24 deletions(-)
diff mbox

Patch

diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
index 7923486157d..e8a4ba6fe8d 100644
--- a/gcc/config/i386/i386.c
+++ b/gcc/config/i386/i386.c
@@ -12801,15 +12801,39 @@  static inline bool fp_valid_at (HOST_WIDE_INT cfa_offset)
 			  && cfa_offset >= fs.sp_realigned_offset);
 }
 
-/* Return an RTX that points to CFA_OFFSET within the stack frame.
-   The valid base registers are taken from CFUN->MACHINE->FS.  */
+/* Choose a base register based upon alignment requested, speed and/or
+   size.  */
 
-static rtx
-choose_baseaddr (HOST_WIDE_INT cfa_offset)
+static void choose_basereg (HOST_WIDE_INT cfa_offset, rtx &base_reg,
+			    HOST_WIDE_INT &base_offset,
+			    unsigned int align_reqested, unsigned int *align)
 {
   const struct machine_function *m = cfun->machine;
-  rtx base_reg = NULL;
-  HOST_WIDE_INT base_offset = 0;
+  unsigned int hfp_align;
+  unsigned int drap_align;
+  unsigned int sp_align;
+  bool hfp_ok  = fp_valid_at (cfa_offset);
+  bool drap_ok = m->fs.drap_valid;
+  bool sp_ok   = sp_valid_at (cfa_offset);
+
+  hfp_align = drap_align = sp_align = INCOMING_STACK_BOUNDARY;
+
+  /* Filter out any registers that don't meet the requested alignment
+     criteria.  */
+  if (align_reqested)
+    {
+      if (m->fs.realigned)
+	hfp_align = drap_align = sp_align = crtl->stack_alignment_needed;
+      /* SEH unwind code does do not currently support REG_CFA_EXPRESSION
+	 notes (which we would need to use a realigned stack pointer),
+	 so disable on SEH targets.  */
+      else if (m->fs.sp_realigned)
+	sp_align = crtl->stack_alignment_needed;
+
+      hfp_ok = hfp_ok && hfp_align >= align_reqested;
+      drap_ok = drap_ok && drap_align >= align_reqested;
+      sp_ok = sp_ok && sp_align >= align_reqested;
+    }
 
   if (m->use_fast_prologue_epilogue)
     {
@@ -12818,17 +12842,17 @@  choose_baseaddr (HOST_WIDE_INT cfa_offset)
          while DRAP must be reloaded within the epilogue.  But choose either
          over the SP due to increased encoding size.  */
 
-      if (m->fs.fp_valid)
+      if (hfp_ok)
 	{
 	  base_reg = hard_frame_pointer_rtx;
 	  base_offset = m->fs.fp_offset - cfa_offset;
 	}
-      else if (m->fs.drap_valid)
+      else if (drap_ok)
 	{
 	  base_reg = crtl->drap_reg;
 	  base_offset = 0 - cfa_offset;
 	}
-      else if (m->fs.sp_valid)
+      else if (sp_ok)
 	{
 	  base_reg = stack_pointer_rtx;
 	  base_offset = m->fs.sp_offset - cfa_offset;
@@ -12841,13 +12865,13 @@  choose_baseaddr (HOST_WIDE_INT cfa_offset)
 
       /* Choose the base register with the smallest address encoding.
          With a tie, choose FP > DRAP > SP.  */
-      if (m->fs.sp_valid)
+      if (sp_ok)
 	{
 	  base_reg = stack_pointer_rtx;
 	  base_offset = m->fs.sp_offset - cfa_offset;
           len = choose_baseaddr_len (STACK_POINTER_REGNUM, base_offset);
 	}
-      if (m->fs.drap_valid)
+      if (drap_ok)
 	{
 	  toffset = 0 - cfa_offset;
 	  tlen = choose_baseaddr_len (REGNO (crtl->drap_reg), toffset);
@@ -12858,7 +12882,7 @@  choose_baseaddr (HOST_WIDE_INT cfa_offset)
 	      len = tlen;
 	    }
 	}
-      if (m->fs.fp_valid)
+      if (hfp_ok)
 	{
 	  toffset = m->fs.fp_offset - cfa_offset;
 	  tlen = choose_baseaddr_len (HARD_FRAME_POINTER_REGNUM, toffset);
@@ -12870,8 +12894,40 @@  choose_baseaddr (HOST_WIDE_INT cfa_offset)
 	    }
 	}
     }
-  gcc_assert (base_reg != NULL);
 
+    /* Set the align return value.  */
+    if (align)
+      {
+	if (base_reg == stack_pointer_rtx)
+	  *align = sp_align;
+	else if (base_reg == crtl->drap_reg)
+	  *align = drap_align;
+	else if (base_reg == hard_frame_pointer_rtx)
+	  *align = hfp_align;
+      }
+}
+
+/* Return an RTX that points to CFA_OFFSET within the stack frame and
+   the alignment of address.  If align is non-null, it should point to
+   an alignment value (in bits) that is preferred or zero and will
+   recieve the alignment of the base register that was selected.  The
+   valid base registers are taken from CFUN->MACHINE->FS.  */
+
+static rtx
+choose_baseaddr (HOST_WIDE_INT cfa_offset, unsigned int *align)
+{
+  rtx base_reg = NULL;
+  HOST_WIDE_INT base_offset = 0;
+
+  /* If a specific alignment is requested, try to get a base register
+     with that alignment first.  */
+  if (align && *align)
+    choose_basereg (cfa_offset, base_reg, base_offset, *align, align);
+
+  if (!base_reg)
+    choose_basereg (cfa_offset, base_reg, base_offset, 0, align);
+
+  gcc_assert (base_reg != NULL);
   return plus_constant (Pmode, base_reg, base_offset);
 }
 
@@ -12900,13 +12956,14 @@  ix86_emit_save_reg_using_mov (machine_mode mode, unsigned int regno,
   struct machine_function *m = cfun->machine;
   rtx reg = gen_rtx_REG (mode, regno);
   rtx mem, addr, base, insn;
-  unsigned int align;
+  unsigned int align = GET_MODE_ALIGNMENT (mode);
 
-  addr = choose_baseaddr (cfa_offset);
+  addr = choose_baseaddr (cfa_offset, &align);
   mem = gen_frame_mem (mode, addr);
 
-  /* The location is aligned up to INCOMING_STACK_BOUNDARY.  */
-  align = MIN (GET_MODE_ALIGNMENT (mode), INCOMING_STACK_BOUNDARY);
+  /* The location aligment depends upon the base register.  */
+  align = MIN (GET_MODE_ALIGNMENT (mode), align);
+  gcc_assert (! (cfa_offset & (align / BITS_PER_UNIT - 1)));
   set_mem_align (mem, align);
 
   insn = emit_insn (gen_rtx_SET (mem, reg));
@@ -12946,6 +13003,13 @@  ix86_emit_save_reg_using_mov (machine_mode mode, unsigned int regno,
 	}
     }
 
+  else if (base == stack_pointer_rtx && m->fs.sp_realigned
+	   && cfa_offset >= m->fs.sp_realigned_offset)
+    {
+      gcc_checking_assert (stack_realign_fp);
+      add_reg_note (insn, REG_CFA_EXPRESSION, gen_rtx_SET (mem, reg));
+    }
+
   /* The memory may not be relative to the current CFA register,
      which means that we may need to generate a new pattern for
      use by the unwind info.  */
@@ -14350,7 +14414,7 @@  ix86_expand_prologue (void)
       /* vDRAP is setup but after reload it turns out stack realign
          isn't necessary, here we will emit prologue to setup DRAP
          without stack realign adjustment */
-      t = choose_baseaddr (0);
+      t = choose_baseaddr (0, NULL);
       emit_insn (gen_rtx_SET (crtl->drap_reg, t));
     }
 
@@ -14487,7 +14551,7 @@  ix86_emit_restore_regs_using_mov (HOST_WIDE_INT cfa_offset,
 	rtx mem;
 	rtx_insn *insn;
 
-	mem = choose_baseaddr (cfa_offset);
+	mem = choose_baseaddr (cfa_offset, NULL);
 	mem = gen_frame_mem (word_mode, mem);
 	insn = emit_move_insn (reg, mem);
 
@@ -14524,13 +14588,14 @@  ix86_emit_restore_sse_regs_using_mov (HOST_WIDE_INT cfa_offset,
       {
 	rtx reg = gen_rtx_REG (V4SFmode, regno);
 	rtx mem;
-	unsigned int align;
+	unsigned int align = GET_MODE_ALIGNMENT (V4SFmode);
 
-	mem = choose_baseaddr (cfa_offset);
+	mem = choose_baseaddr (cfa_offset, &align);
 	mem = gen_rtx_MEM (V4SFmode, mem);
 
-	/* The location is aligned up to INCOMING_STACK_BOUNDARY.  */
-	align = MIN (GET_MODE_ALIGNMENT (V4SFmode), INCOMING_STACK_BOUNDARY);
+	/* The location aligment depends upon the base register.  */
+	align = MIN (GET_MODE_ALIGNMENT (V4SFmode), align);
+	gcc_assert (! (cfa_offset & (align / BITS_PER_UNIT - 1)));
 	set_mem_align (mem, align);
 	emit_insn (gen_rtx_SET (reg, mem));
 
diff --git a/gcc/config/i386/winnt.c b/gcc/config/i386/winnt.c
index f89e7d00fe2..8272c7fddc1 100644
--- a/gcc/config/i386/winnt.c
+++ b/gcc/config/i386/winnt.c
@@ -1128,7 +1128,8 @@  i386_pe_seh_unwind_emit (FILE *asm_out_file, rtx_insn *insn)
 
 	case REG_CFA_DEF_CFA:
 	case REG_CFA_EXPRESSION:
-	  /* Only emitted with DRAP, which we disable.  */
+	  /* Only emitted with DRAP and aligned memory access using a
+	     realigned SP, both of which we disable.  */
 	  gcc_unreachable ();
 	  break;