diff mbox series

xtensa: Optimize stack pointer updates in function pro/epilogue under certain conditions

Message ID 70d62887-e221-786b-cf16-612be9f7c179@yahoo.co.jp
State New
Headers show
Series xtensa: Optimize stack pointer updates in function pro/epilogue under certain conditions | expand

Commit Message

Takayuki 'January June' Suwa Aug. 17, 2022, 7:31 p.m. UTC
This patch enforces the use of "addmi" machine instruction instead of
addition/subtraction with two source registers for adjusting the stack
pointer, if the adjustment fits into a signed 16-bit and is also a multiple
of 256.

    /* example */
    void test(void) {
      char buffer[4096];
      __asm__(""::"m"(buffer));
    }

    ;; before
    test:
	movi.n	a9, 1
	slli	a9, a9, 12
	sub	sp, sp, a9
	movi.n	a9, 1
	slli	a9, a9, 12
	add.n	sp, sp, a9
	addi	sp, sp, 0
	ret.n

    ;; after
    test:
	addmi	sp, sp, -0x1000
	addmi	sp, sp, 0x1000
	ret.n

gcc/ChangeLog:

	* config/xtensa/xtensa.cc (xtensa_expand_prologue):
	Use an "addmi" machine instruction for updating the stack pointer
	rather than addition/subtraction via hard register A9, if the amount
	of change satisfies the literal value conditions of that instruction
	when the CALL0 ABI is used.
	(xtensa_expand_epilogue): Ditto.
	And also inhibit the stack pointer addition of constant zero.
---
 gcc/config/xtensa/xtensa.cc | 79 +++++++++++++++++++++++++------------
 1 file changed, 54 insertions(+), 25 deletions(-)

Comments

Max Filippov Aug. 18, 2022, 12:49 a.m. UTC | #1
On Wed, Aug 17, 2022 at 12:32 PM Takayuki 'January June' Suwa
<jjsuwa_sys3175@yahoo.co.jp> wrote:
>
> This patch enforces the use of "addmi" machine instruction instead of
> addition/subtraction with two source registers for adjusting the stack
> pointer, if the adjustment fits into a signed 16-bit and is also a multiple
> of 256.
>
>     /* example */
>     void test(void) {
>       char buffer[4096];
>       __asm__(""::"m"(buffer));
>     }
>
>     ;; before
>     test:
>         movi.n  a9, 1
>         slli    a9, a9, 12
>         sub     sp, sp, a9
>         movi.n  a9, 1
>         slli    a9, a9, 12
>         add.n   sp, sp, a9
>         addi    sp, sp, 0
>         ret.n
>
>     ;; after
>     test:
>         addmi   sp, sp, -0x1000
>         addmi   sp, sp, 0x1000
>         ret.n
>
> gcc/ChangeLog:
>
>         * config/xtensa/xtensa.cc (xtensa_expand_prologue):
>         Use an "addmi" machine instruction for updating the stack pointer
>         rather than addition/subtraction via hard register A9, if the amount
>         of change satisfies the literal value conditions of that instruction
>         when the CALL0 ABI is used.
>         (xtensa_expand_epilogue): Ditto.
>         And also inhibit the stack pointer addition of constant zero.
> ---
>  gcc/config/xtensa/xtensa.cc | 79 +++++++++++++++++++++++++------------
>  1 file changed, 54 insertions(+), 25 deletions(-)

Regtested for target=xtensa-linux-uclibc, no new regressions.
Committed to master.
diff mbox series

Patch

diff --git a/gcc/config/xtensa/xtensa.cc b/gcc/config/xtensa/xtensa.cc
index 6ac879c38fb..b673b6764da 100644
--- a/gcc/config/xtensa/xtensa.cc
+++ b/gcc/config/xtensa/xtensa.cc
@@ -3150,7 +3150,6 @@  xtensa_expand_prologue (void)
   rtx_insn *insn = NULL;
   rtx note_rtx;
 
-
   total_size = compute_frame_size (get_frame_size ());
 
   if (flag_stack_usage_info)
@@ -3206,10 +3205,17 @@  xtensa_expand_prologue (void)
 	    }
 	  else
 	    {
-	      rtx tmp_reg = gen_rtx_REG (Pmode, A9_REG);
-	      emit_move_insn (tmp_reg, GEN_INT (total_size));
-	      insn = emit_insn (gen_subsi3 (stack_pointer_rtx,
-					    stack_pointer_rtx, tmp_reg));
+	      if (xtensa_simm8x256 (-total_size))
+		insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
+					      stack_pointer_rtx,
+					      GEN_INT (-total_size)));
+	      else
+		{
+		  rtx tmp_reg = gen_rtx_REG (Pmode, A9_REG);
+		  emit_move_insn (tmp_reg, GEN_INT (total_size));
+		  insn = emit_insn (gen_subsi3 (stack_pointer_rtx,
+						stack_pointer_rtx, tmp_reg));
+		}
 	      RTX_FRAME_RELATED_P (insn) = 1;
 	      note_rtx = gen_rtx_SET (stack_pointer_rtx,
 				      plus_constant (Pmode, stack_pointer_rtx,
@@ -3237,11 +3243,19 @@  xtensa_expand_prologue (void)
       if (total_size > 1024
 	  || (!callee_save_size && total_size > 128))
 	{
-	  rtx tmp_reg = gen_rtx_REG (Pmode, A9_REG);
-	  emit_move_insn (tmp_reg, GEN_INT (total_size -
-					    callee_save_size));
-	  insn = emit_insn (gen_subsi3 (stack_pointer_rtx,
-					stack_pointer_rtx, tmp_reg));
+	  if (xtensa_simm8x256 (callee_save_size - total_size))
+	    insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
+					  stack_pointer_rtx,
+					  GEN_INT (callee_save_size -
+						   total_size)));
+	  else
+	    {
+	      rtx tmp_reg = gen_rtx_REG (Pmode, A9_REG);
+	      emit_move_insn (tmp_reg, GEN_INT (total_size -
+						callee_save_size));
+	      insn = emit_insn (gen_subsi3 (stack_pointer_rtx,
+					    stack_pointer_rtx, tmp_reg));
+	    }
 	  RTX_FRAME_RELATED_P (insn) = 1;
 	  note_rtx = gen_rtx_SET (stack_pointer_rtx,
 				  plus_constant (Pmode, stack_pointer_rtx,
@@ -3315,12 +3329,21 @@  xtensa_expand_epilogue (bool sibcall_p)
 
       if (cfun->machine->current_frame_size > (frame_pointer_needed ? 127 : 1024))
 	{
-	  rtx tmp_reg = gen_rtx_REG (Pmode, A9_REG);
-	  emit_move_insn (tmp_reg, GEN_INT (cfun->machine->current_frame_size -
-					    cfun->machine->callee_save_size));
-	  emit_insn (gen_addsi3 (stack_pointer_rtx, frame_pointer_needed ?
-				 hard_frame_pointer_rtx : stack_pointer_rtx,
-				 tmp_reg));
+	  if (xtensa_simm8x256 (cfun->machine->current_frame_size -
+				cfun->machine->callee_save_size))
+	    emit_insn (gen_addsi3 (stack_pointer_rtx, frame_pointer_needed ?
+				   hard_frame_pointer_rtx : stack_pointer_rtx,
+				   GEN_INT (cfun->machine->current_frame_size -
+					    cfun->machine->callee_save_size)));
+	  else
+	    {
+	      rtx tmp_reg = gen_rtx_REG (Pmode, A9_REG);
+	      emit_move_insn (tmp_reg, GEN_INT (cfun->machine->current_frame_size -
+						cfun->machine->callee_save_size));
+	      emit_insn (gen_addsi3 (stack_pointer_rtx, frame_pointer_needed ?
+				     hard_frame_pointer_rtx : stack_pointer_rtx,
+				     tmp_reg));
+	    }
 	  offset = cfun->machine->callee_save_size - UNITS_PER_WORD;
 	}
       else
@@ -3360,18 +3383,24 @@  xtensa_expand_epilogue (bool sibcall_p)
 		offset = cfun->machine->current_frame_size;
 	      else
 		offset = cfun->machine->callee_save_size;
-
-	      emit_insn (gen_addsi3 (stack_pointer_rtx,
-				     stack_pointer_rtx,
-				     GEN_INT (offset)));
+	      if (offset)
+		emit_insn (gen_addsi3 (stack_pointer_rtx,
+				       stack_pointer_rtx,
+				       GEN_INT (offset)));
 	    }
 	  else
 	    {
-	      rtx tmp_reg = gen_rtx_REG (Pmode, A9_REG);
-	      emit_move_insn (tmp_reg,
-			      GEN_INT (cfun->machine->current_frame_size));
-	      emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
-				     tmp_reg));
+	      if (xtensa_simm8x256 (cfun->machine->current_frame_size))
+		emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
+				       GEN_INT (cfun->machine->current_frame_size)));
+	      else
+		{
+		  rtx tmp_reg = gen_rtx_REG (Pmode, A9_REG);
+		  emit_move_insn (tmp_reg,
+				  GEN_INT (cfun->machine->current_frame_size));
+		  emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
+					 tmp_reg));
+		}
 	    }
 	}