diff mbox

[AArch64] Align FP callee-saves

Message ID AM5PR0802MB26107E6FD9F40786D4E3CB2E83D00@AM5PR0802MB2610.eurprd08.prod.outlook.com
State New
Headers show

Commit Message

Wilco Dijkstra Oct. 17, 2016, 12:40 p.m. UTC
ping


From: Wilco Dijkstra
Sent: 08 September 2016 14:35
To: GCC Patches
Cc: nd
Subject: [PATCH][AArch64] Align FP callee-saves
    
If the number of integer callee-saves is odd, the FP callee-saves use 8-byte aligned
LDP/STP.  Since 16-byte alignment may be faster on some CPUs, align the FP
callee-saves to 16 bytes and use the alignment gap for the last FP callee-save when
possible. Besides slightly different offsets for FP callee-saves, the generated code
doesn't change.

Bootstrap and regression pass, OK for commit?


ChangeLog:
2016-09-08  Wilco Dijkstra  <wdijkstr@arm.com>

        * config/aarch64/aarch64.c (aarch64_layout_frame):
        Align FP callee-saves.
--

Comments

James Greenhalgh Oct. 18, 2016, 4:28 p.m. UTC | #1
On Mon, Oct 17, 2016 at 12:40:18PM +0000, Wilco Dijkstra wrote:
> 
> ping
>
> If the number of integer callee-saves is odd, the FP callee-saves use 8-byte
> aligned LDP/STP.  Since 16-byte alignment may be faster on some CPUs, align
> the FP callee-saves to 16 bytes and use the alignment gap for the last FP
> callee-save when possible. Besides slightly different offsets for FP
> callee-saves, the generated code doesn't change.
> 
> Bootstrap and regression pass, OK for commit?

This looks OK to me.

Thanks for the patch.

James

> ChangeLog:
> 2016-09-08  Wilco Dijkstra  <wdijkstr@arm.com>
> 
>         * config/aarch64/aarch64.c (aarch64_layout_frame):
>         Align FP callee-saves.
> --
> diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
> index fed3b6e803821392194dc34a6c3df5f653d2e33e..075b3802c72a68f63b47574e19186e7ce3440b28 100644
> --- a/gcc/config/aarch64/aarch64.c
> +++ b/gcc/config/aarch64/aarch64.c
> @@ -2735,7 +2735,7 @@ static void
>  aarch64_layout_frame (void)
>  {
>    HOST_WIDE_INT offset = 0;
> -  int regno;
> +  int regno, last_fp_reg = INVALID_REGNUM;
>  
>    if (reload_completed && cfun->machine->frame.laid_out)
>      return;
> @@ -2781,7 +2781,10 @@ aarch64_layout_frame (void)
>    for (regno = V0_REGNUM; regno <= V31_REGNUM; regno++)
>      if (df_regs_ever_live_p (regno)
>          && !call_used_regs[regno])
> -      cfun->machine->frame.reg_offset[regno] = SLOT_REQUIRED;
> +      {
> +       cfun->machine->frame.reg_offset[regno] = SLOT_REQUIRED;
> +       last_fp_reg = regno;
> +      }
>  
>    if (cfun->machine->frame.emit_frame_chain)
>      {
> @@ -2805,9 +2808,21 @@ aarch64_layout_frame (void)
>          offset += UNITS_PER_WORD;
>        }
>  
> +  HOST_WIDE_INT max_int_offset = offset;
> +  offset = ROUND_UP (offset, STACK_BOUNDARY / BITS_PER_UNIT);
> +  bool has_align_gap = offset != max_int_offset;
> +
>    for (regno = V0_REGNUM; regno <= V31_REGNUM; regno++)
>      if (cfun->machine->frame.reg_offset[regno] == SLOT_REQUIRED)
>        {
> +       /* If there is an alignment gap between integer and fp callee-saves,
> +          allocate the last fp register to it if possible.  */
> +       if (regno == last_fp_reg && has_align_gap && (offset & 8) == 0)
> +         {
> +           cfun->machine->frame.reg_offset[regno] = max_int_offset;
> +           break;
> +         }
> +
>          cfun->machine->frame.reg_offset[regno] = offset;
>          if (cfun->machine->frame.wb_candidate1 == INVALID_REGNUM)
>            cfun->machine->frame.wb_candidate1 = regno;
>
diff mbox

Patch

diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
index fed3b6e803821392194dc34a6c3df5f653d2e33e..075b3802c72a68f63b47574e19186e7ce3440b28 100644
--- a/gcc/config/aarch64/aarch64.c
+++ b/gcc/config/aarch64/aarch64.c
@@ -2735,7 +2735,7 @@  static void
 aarch64_layout_frame (void)
 {
   HOST_WIDE_INT offset = 0;
-  int regno;
+  int regno, last_fp_reg = INVALID_REGNUM;
 
   if (reload_completed && cfun->machine->frame.laid_out)
     return;
@@ -2781,7 +2781,10 @@  aarch64_layout_frame (void)
   for (regno = V0_REGNUM; regno <= V31_REGNUM; regno++)
     if (df_regs_ever_live_p (regno)
         && !call_used_regs[regno])
-      cfun->machine->frame.reg_offset[regno] = SLOT_REQUIRED;
+      {
+       cfun->machine->frame.reg_offset[regno] = SLOT_REQUIRED;
+       last_fp_reg = regno;
+      }
 
   if (cfun->machine->frame.emit_frame_chain)
     {
@@ -2805,9 +2808,21 @@  aarch64_layout_frame (void)
         offset += UNITS_PER_WORD;
       }
 
+  HOST_WIDE_INT max_int_offset = offset;
+  offset = ROUND_UP (offset, STACK_BOUNDARY / BITS_PER_UNIT);
+  bool has_align_gap = offset != max_int_offset;
+
   for (regno = V0_REGNUM; regno <= V31_REGNUM; regno++)
     if (cfun->machine->frame.reg_offset[regno] == SLOT_REQUIRED)
       {
+       /* If there is an alignment gap between integer and fp callee-saves,
+          allocate the last fp register to it if possible.  */
+       if (regno == last_fp_reg && has_align_gap && (offset & 8) == 0)
+         {
+           cfun->machine->frame.reg_offset[regno] = max_int_offset;
+           break;
+         }
+
         cfun->machine->frame.reg_offset[regno] = offset;
         if (cfun->machine->frame.wb_candidate1 == INVALID_REGNUM)
           cfun->machine->frame.wb_candidate1 = regno;