[AAarch64,v3] Add support for TARGET_COMPUTE_FRAME_LAYOUT

Message ID 0f11a4e3-9c67-e9b6-49dc-4b3573ca8f0f@arm.com
State New
Headers show
Series
  • [AAarch64,v3] Add support for TARGET_COMPUTE_FRAME_LAYOUT
Related show

Commit Message

Vlad Lazar Aug. 6, 2018, 4:14 p.m.
Hi,

The patch adds support for the TARGET_COMPUTE_FRAME_LAYOUT hook on AArch64
and removes unneeded frame layout recalculation.

The removed aarch64_layout_frame calls are unnecessary because the functions in which
they appear will be called during or after the reload pass in which the TARGET_COMPUTE_FRAME_LAYOUT
hook is called. The if statement in aarch64_layout_frame had the purpose of avoiding
the extra work from the calls which have been removed and is now redundant.

Bootstrapped and regtested on aarch64-none-linux-gnu and there are no regressions.

Thanks,
Vlad

gcc/
2018-08-06  Vlad Lazar  <vlad.lazar@arm.com>

	* config/aarch64/aarch64.h (TARGET_COMPUTE_FRAME_LAYOUT): Define.
	* config/aarch64/aarch64.c (aarch64_expand_prologue): Remove aarch64_layout_frame call.
	(aarch64_expand_epilogue): Likewise.
	(aarch64_initial_elimination_offset): Likewise.
	(aarch64_get_separate_components): Likewise.
	(aarch64_use_return_insn_p): Likewise.
	(aarch64_layout_frame): Remove unneeded check.

---

Comments

Vlad Lazar Aug. 28, 2018, 9 a.m. | #1
Gentle ping.

On 06/08/18 17:14, Vlad Lazar wrote:
> Hi,
>
> The patch adds support for the TARGET_COMPUTE_FRAME_LAYOUT hook on AArch64
> and removes unneeded frame layout recalculation.
>
> The removed aarch64_layout_frame calls are unnecessary because the functions in which
> they appear will be called during or after the reload pass in which the TARGET_COMPUTE_FRAME_LAYOUT
> hook is called. The if statement in aarch64_layout_frame had the purpose of avoiding
> the extra work from the calls which have been removed and is now redundant.
>
> Bootstrapped and regtested on aarch64-none-linux-gnu and there are no regressions.
>
> Thanks,
> Vlad
>
> gcc/
> 2018-08-06  Vlad Lazar  <vlad.lazar@arm.com>
>
>      * config/aarch64/aarch64.h (TARGET_COMPUTE_FRAME_LAYOUT): Define.
>      * config/aarch64/aarch64.c (aarch64_expand_prologue): Remove aarch64_layout_frame call.
>      (aarch64_expand_epilogue): Likewise.
>      (aarch64_initial_elimination_offset): Likewise.
>      (aarch64_get_separate_components): Likewise.
>      (aarch64_use_return_insn_p): Likewise.
>      (aarch64_layout_frame): Remove unneeded check.
>
> ---
>
> diff --git a/gcc/config/aarch64/aarch64.h b/gcc/config/aarch64/aarch64.h
> index 976f9afae54c1c98c22a4d5002d8d94e33b3190a..02aaa333fb57eff918049681173403f004a8a8e3 100644
> --- a/gcc/config/aarch64/aarch64.h
> +++ b/gcc/config/aarch64/aarch64.h
> @@ -478,6 +478,9 @@ extern unsigned aarch64_architecture_version;
>   #undef DONT_USE_BUILTIN_SETJMP
>   #define DONT_USE_BUILTIN_SETJMP 1
>
> +#undef TARGET_COMPUTE_FRAME_LAYOUT
> +#define TARGET_COMPUTE_FRAME_LAYOUT aarch64_layout_frame
> +
>   /* Register in which the structure value is to be returned.  */
>   #define AARCH64_STRUCT_VALUE_REGNUM R8_REGNUM
>
> diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
> index b88e7cac27ab76e01b9769563ec9077d2a81bd7b..6a52eecf94011f5a7ee787c1295ca24732af2ff4 100644
> --- a/gcc/config/aarch64/aarch64.c
> +++ b/gcc/config/aarch64/aarch64.c
> @@ -4021,9 +4021,6 @@ aarch64_layout_frame (void)
>     HOST_WIDE_INT offset = 0;
>     int regno, last_fp_reg = INVALID_REGNUM;
>
> -  if (reload_completed && cfun->machine->frame.laid_out)
> -    return;
> -
>     cfun->machine->frame.emit_frame_chain = aarch64_needs_frame_chain ();
>
>   #define SLOT_NOT_REQUIRED (-2)
> @@ -4567,8 +4564,6 @@ offset_12bit_unsigned_scaled_p (machine_mode mode, poly_int64 offset)
>   static sbitmap
>   aarch64_get_separate_components (void)
>   {
> -  aarch64_layout_frame ();
> -
>     sbitmap components = sbitmap_alloc (LAST_SAVED_REGNUM + 1);
>     bitmap_clear (components);
>
> @@ -4592,7 +4587,7 @@ aarch64_get_separate_components (void)
>
>     unsigned reg1 = cfun->machine->frame.wb_candidate1;
>     unsigned reg2 = cfun->machine->frame.wb_candidate2;
> -  /* If aarch64_layout_frame has chosen registers to store/restore with
> +  /* If registers have been chosen to be stored/restored with
>        writeback don't interfere with them to avoid having to output explicit
>        stack adjustment instructions.  */
>     if (reg2 != INVALID_REGNUM)
> @@ -4850,8 +4845,6 @@ aarch64_add_cfa_expression (rtx_insn *insn, unsigned int reg,
>   void
>   aarch64_expand_prologue (void)
>   {
> -  aarch64_layout_frame ();
> -
>     poly_int64 frame_size = cfun->machine->frame.frame_size;
>     poly_int64 initial_adjust = cfun->machine->frame.initial_adjust;
>     HOST_WIDE_INT callee_adjust = cfun->machine->frame.callee_adjust;
> @@ -4964,8 +4957,6 @@ aarch64_use_return_insn_p (void)
>     if (crtl->profile)
>       return false;
>
> -  aarch64_layout_frame ();
> -
>     return known_eq (cfun->machine->frame.frame_size, 0);
>   }
>
> @@ -4977,8 +4968,6 @@ aarch64_use_return_insn_p (void)
>   void
>   aarch64_expand_epilogue (bool for_sibcall)
>   {
> -  aarch64_layout_frame ();
> -
>     poly_int64 initial_adjust = cfun->machine->frame.initial_adjust;
>     HOST_WIDE_INT callee_adjust = cfun->machine->frame.callee_adjust;
>     poly_int64 final_adjust = cfun->machine->frame.final_adjust;
> @@ -7525,8 +7514,6 @@ aarch64_can_eliminate (const int from ATTRIBUTE_UNUSED, const int to)
>   poly_int64
>   aarch64_initial_elimination_offset (unsigned from, unsigned to)
>   {
> -  aarch64_layout_frame ();
> -
>     if (to == HARD_FRAME_POINTER_REGNUM)
>       {
>         if (from == ARG_POINTER_REGNUM)
James Greenhalgh Sept. 11, 2018, 4:53 p.m. | #2
On Mon, Aug 06, 2018 at 11:14:17AM -0500, Vlad Lazar wrote:
> Hi,
> 
> The patch adds support for the TARGET_COMPUTE_FRAME_LAYOUT hook on AArch64
> and removes unneeded frame layout recalculation.
> 
> The removed aarch64_layout_frame calls are unnecessary because the functions in which
> they appear will be called during or after the reload pass in which the TARGET_COMPUTE_FRAME_LAYOUT
> hook is called. The if statement in aarch64_layout_frame had the purpose of avoiding
> the extra work from the calls which have been removed and is now redundant.

I'm not sure I understand, I may be missing something as the frame layout
is complex, but I can't get where I need to be to accept your patch from this
comment.

The check you removed ensures that if we're after reload, and the frame is
laid out, we do no additional work. That part I understand, and that would
mean that any post-reload calls were no-ops. Is the argument that all
users of this code that you eliminate are after reload, and consequently
would have hit this no-op path? Can you talk me through why each case is
safe?

Thanks,
James

> gcc/
> 2018-08-06  Vlad Lazar  <vlad.lazar@arm.com>
> 
> 	* config/aarch64/aarch64.h (TARGET_COMPUTE_FRAME_LAYOUT): Define.
> 	* config/aarch64/aarch64.c (aarch64_expand_prologue): Remove aarch64_layout_frame call.
> 	(aarch64_expand_epilogue): Likewise.
> 	(aarch64_initial_elimination_offset): Likewise.
> 	(aarch64_get_separate_components): Likewise.
> 	(aarch64_use_return_insn_p): Likewise.
> 	(aarch64_layout_frame): Remove unneeded check.
Vlad Lazar Sept. 12, 2018, 1:07 p.m. | #3
On 11/09/18 17:53, James Greenhalgh wrote:
> On Mon, Aug 06, 2018 at 11:14:17AM -0500, Vlad Lazar wrote:
>> Hi,
>>
>> The patch adds support for the TARGET_COMPUTE_FRAME_LAYOUT hook on AArch64
>> and removes unneeded frame layout recalculation.
>>
>> The removed aarch64_layout_frame calls are unnecessary because the functions in which
>> they appear will be called during or after the reload pass in which the TARGET_COMPUTE_FRAME_LAYOUT
>> hook is called. The if statement in aarch64_layout_frame had the purpose of avoiding
>> the extra work from the calls which have been removed and is now redundant.
>
> I'm not sure I understand, I may be missing something as the frame layout
> is complex, but I can't get where I need to be to accept your patch from this
> comment.
>
> The check you removed ensures that if we're after reload, and the frame is
> laid out, we do no additional work. That part I understand, and that would
> mean that any post-reload calls were no-ops. Is the argument that all
> users of this code that you eliminate are after reload, and consequently
> would have hit this no-op path? Can you talk me through why each case is
> safe?
>
Thanks for taking a look at the patch.

Indeed, all the removed calls are happening during or after reload. I'll go trough all of them
and try to explain the rationale behind.

aarch64_expand_prologue and aarch64_expand_epilogue are called after the pro_and_epilogue pass,
which runs after reload where TARGET_COMPUTE_FRAME_LAYOUT is called.

aarch64_use_return_insn_p checks explicitly for reload_completed at the beginning of the function
and returns false if reload has not run. So it's safe to remove the call as the frame layout is
computed by the time it reaches that point.

aarch64_get_separate_components implements the TARGET_SHRINK_WRAP_GET_SEPARATE_COMPONENTS hook.
This hook only seems to be used int shrink_wrap.c:try_shrink_wrapping_separate. The actual origin
of this hook call can be traced back to the pro_and_epilogue pass:
shrink_wrap.c:try_shrink_wrapping_separate <-
function.c:thread_prologue_and_epilogue_insns <-
function.c:rest_of_handle_thread_prologue_and_epilogue (pro_and_epilogue pass entry point).
Therefore, aarch64_get_separate_components only gets called post reload.

aarch64_get_separate_components implements the INITIAL_ELIMINATION_OFFSET hook, which is used in:
	1. rtlanal.c:get_initial_register_offset: Before using the hook it checks that reload has
	been completed.
	2. reload1.c:get_initial_register_offset and reload1.c:set_initial_elim_offsets: These functions
	explicitly call TARGET_COMPUTE_FRAME_LAYOUT before using the hook.
	3. lra-eliminitations.c:update_reg_eliminate: The TARGET_COMPUTE_FRAME_LAYOUT is, again, called
	before the INITIAL_ELIMINATION_OFFSET hook is used.

I hope this helps make things a bit clearer.

Thanks,
Vlad

> Thanks,
> James
>
>> gcc/
>> 2018-08-06  Vlad Lazar  <vlad.lazar@arm.com>
>>
>> 	* config/aarch64/aarch64.h (TARGET_COMPUTE_FRAME_LAYOUT): Define.
>> 	* config/aarch64/aarch64.c (aarch64_expand_prologue): Remove aarch64_layout_frame call.
>> 	(aarch64_expand_epilogue): Likewise.
>> 	(aarch64_initial_elimination_offset): Likewise.
>> 	(aarch64_get_separate_components): Likewise.
>> 	(aarch64_use_return_insn_p): Likewise.
>> 	(aarch64_layout_frame): Remove unneeded check.
James Greenhalgh Sept. 12, 2018, 1:47 p.m. | #4
On Wed, Sep 12, 2018 at 08:07:41AM -0500, Vlad Lazar wrote:
> On 11/09/18 17:53, James Greenhalgh wrote:
> > On Mon, Aug 06, 2018 at 11:14:17AM -0500, Vlad Lazar wrote:
> >> Hi,
> >>
> >> The patch adds support for the TARGET_COMPUTE_FRAME_LAYOUT hook on AArch64
> >> and removes unneeded frame layout recalculation.
> >>
> >> The removed aarch64_layout_frame calls are unnecessary because the functions in which
> >> they appear will be called during or after the reload pass in which the TARGET_COMPUTE_FRAME_LAYOUT
> >> hook is called. The if statement in aarch64_layout_frame had the purpose of avoiding
> >> the extra work from the calls which have been removed and is now redundant.
> >
> > I'm not sure I understand, I may be missing something as the frame layout
> > is complex, but I can't get where I need to be to accept your patch from this
> > comment.
> >
> > The check you removed ensures that if we're after reload, and the frame is
> > laid out, we do no additional work. That part I understand, and that would
> > mean that any post-reload calls were no-ops. Is the argument that all
> > users of this code that you eliminate are after reload, and consequently
> > would have hit this no-op path? Can you talk me through why each case is
> > safe?
> >
> Thanks for taking a look at the patch.
> 
> Indeed, all the removed calls are happening during or after reload. I'll go trough all of them
> and try to explain the rationale behind.
> 
> aarch64_expand_prologue and aarch64_expand_epilogue are called after the pro_and_epilogue pass,
> which runs after reload where TARGET_COMPUTE_FRAME_LAYOUT is called.
> 
> aarch64_use_return_insn_p checks explicitly for reload_completed at the beginning of the function
> and returns false if reload has not run. So it's safe to remove the call as the frame layout is
> computed by the time it reaches that point.
> 
> aarch64_get_separate_components implements the TARGET_SHRINK_WRAP_GET_SEPARATE_COMPONENTS hook.
> This hook only seems to be used int shrink_wrap.c:try_shrink_wrapping_separate. The actual origin
> of this hook call can be traced back to the pro_and_epilogue pass:
> shrink_wrap.c:try_shrink_wrapping_separate <-
> function.c:thread_prologue_and_epilogue_insns <-
> function.c:rest_of_handle_thread_prologue_and_epilogue (pro_and_epilogue pass entry point).
> Therefore, aarch64_get_separate_components only gets called post reload.
> 
> aarch64_get_separate_components implements the INITIAL_ELIMINATION_OFFSET hook, which is used in:
> 	1. rtlanal.c:get_initial_register_offset: Before using the hook it checks that reload has
> 	been completed.
> 	2. reload1.c:get_initial_register_offset and reload1.c:set_initial_elim_offsets: These functions
> 	explicitly call TARGET_COMPUTE_FRAME_LAYOUT before using the hook.
> 	3. lra-eliminitations.c:update_reg_eliminate: The TARGET_COMPUTE_FRAME_LAYOUT is, again, called
> 	before the INITIAL_ELIMINATION_OFFSET hook is used.
> 
> I hope this helps make things a bit clearer.

Thanks for this, it is very helpful. The patch is OK for trunk.

James

> >> gcc/
> >> 2018-08-06  Vlad Lazar  <vlad.lazar@arm.com>
> >>
> >> 	* config/aarch64/aarch64.h (TARGET_COMPUTE_FRAME_LAYOUT): Define.
> >> 	* config/aarch64/aarch64.c (aarch64_expand_prologue): Remove aarch64_layout_frame call.
> >> 	(aarch64_expand_epilogue): Likewise.
> >> 	(aarch64_initial_elimination_offset): Likewise.
> >> 	(aarch64_get_separate_components): Likewise.
> >> 	(aarch64_use_return_insn_p): Likewise.
> >> 	(aarch64_layout_frame): Remove unneeded check.
>

Patch

diff --git a/gcc/config/aarch64/aarch64.h b/gcc/config/aarch64/aarch64.h
index 976f9afae54c1c98c22a4d5002d8d94e33b3190a..02aaa333fb57eff918049681173403f004a8a8e3 100644
--- a/gcc/config/aarch64/aarch64.h
+++ b/gcc/config/aarch64/aarch64.h
@@ -478,6 +478,9 @@  extern unsigned aarch64_architecture_version;
  #undef DONT_USE_BUILTIN_SETJMP
  #define DONT_USE_BUILTIN_SETJMP 1
  
+#undef TARGET_COMPUTE_FRAME_LAYOUT
+#define TARGET_COMPUTE_FRAME_LAYOUT aarch64_layout_frame
+
  /* Register in which the structure value is to be returned.  */
  #define AARCH64_STRUCT_VALUE_REGNUM R8_REGNUM
  
diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
index b88e7cac27ab76e01b9769563ec9077d2a81bd7b..6a52eecf94011f5a7ee787c1295ca24732af2ff4 100644
--- a/gcc/config/aarch64/aarch64.c
+++ b/gcc/config/aarch64/aarch64.c
@@ -4021,9 +4021,6 @@  aarch64_layout_frame (void)
    HOST_WIDE_INT offset = 0;
    int regno, last_fp_reg = INVALID_REGNUM;
  
-  if (reload_completed && cfun->machine->frame.laid_out)
-    return;
-
    cfun->machine->frame.emit_frame_chain = aarch64_needs_frame_chain ();
  
  #define SLOT_NOT_REQUIRED (-2)
@@ -4567,8 +4564,6 @@  offset_12bit_unsigned_scaled_p (machine_mode mode, poly_int64 offset)
  static sbitmap
  aarch64_get_separate_components (void)
  {
-  aarch64_layout_frame ();
-
    sbitmap components = sbitmap_alloc (LAST_SAVED_REGNUM + 1);
    bitmap_clear (components);
  
@@ -4592,7 +4587,7 @@  aarch64_get_separate_components (void)
  
    unsigned reg1 = cfun->machine->frame.wb_candidate1;
    unsigned reg2 = cfun->machine->frame.wb_candidate2;
-  /* If aarch64_layout_frame has chosen registers to store/restore with
+  /* If registers have been chosen to be stored/restored with
       writeback don't interfere with them to avoid having to output explicit
       stack adjustment instructions.  */
    if (reg2 != INVALID_REGNUM)
@@ -4850,8 +4845,6 @@  aarch64_add_cfa_expression (rtx_insn *insn, unsigned int reg,
  void
  aarch64_expand_prologue (void)
  {
-  aarch64_layout_frame ();
-
    poly_int64 frame_size = cfun->machine->frame.frame_size;
    poly_int64 initial_adjust = cfun->machine->frame.initial_adjust;
    HOST_WIDE_INT callee_adjust = cfun->machine->frame.callee_adjust;
@@ -4964,8 +4957,6 @@  aarch64_use_return_insn_p (void)
    if (crtl->profile)
      return false;
  
-  aarch64_layout_frame ();
-
    return known_eq (cfun->machine->frame.frame_size, 0);
  }
  
@@ -4977,8 +4968,6 @@  aarch64_use_return_insn_p (void)
  void
  aarch64_expand_epilogue (bool for_sibcall)
  {
-  aarch64_layout_frame ();
-
    poly_int64 initial_adjust = cfun->machine->frame.initial_adjust;
    HOST_WIDE_INT callee_adjust = cfun->machine->frame.callee_adjust;
    poly_int64 final_adjust = cfun->machine->frame.final_adjust;
@@ -7525,8 +7514,6 @@  aarch64_can_eliminate (const int from ATTRIBUTE_UNUSED, const int to)
  poly_int64
  aarch64_initial_elimination_offset (unsigned from, unsigned to)
  {
-  aarch64_layout_frame ();
-
    if (to == HARD_FRAME_POINTER_REGNUM)
      {
        if (from == ARG_POINTER_REGNUM)