@@ -1853,13 +1853,13 @@ static void tcg_target_qemu_prologue(TCGContext *s)
+ (TCG_TARGET_STACK_ALIGN - 1);
frame_size_tcg_locals &= ~(TCG_TARGET_STACK_ALIGN - 1);
- /* Push (FP, LR) and allocate space for all saved registers. */
- tcg_out_insn(s, 3314, STP, TCG_REG_FP, TCG_REG_LR,
+ /* Push (x19, x20) and allocate space for all saved registers. */
+ tcg_out_insn(s, 3314, STP, TCG_REG_X19, TCG_REG_X20,
TCG_REG_SP, -frame_size_callee_saved, 1, 1);
- /* Store callee-preserved regs x19..x28. */
- for (r = TCG_REG_X19; r <= TCG_REG_X27; r += 2) {
- int ofs = (r - TCG_REG_X19 + 2) * 8;
+ /* Store callee-preserved regs x21..x30. */
+ for (r = TCG_REG_X21; r < TCG_REG_X30; r += 2) {
+ int ofs = (r - TCG_REG_X19) * 8;
tcg_out_insn(s, 3314, STP, r, r + 1, TCG_REG_SP, ofs, 1, 0);
}
@@ -1887,14 +1887,15 @@ static void tcg_target_qemu_prologue(TCGContext *s)
tcg_out_insn(s, 3401, ADDI, TCG_TYPE_I64, TCG_REG_SP, TCG_REG_SP,
frame_size_tcg_locals);
- /* Restore registers x19..x28. */
- for (r = TCG_REG_X19; r <= TCG_REG_X27; r += 2) {
- int ofs = (r - TCG_REG_X19 + 2) * 8;
+ /* Restore registers x30..x21 in reverse order. This gets LR loaded
+ in the first LDP, minimizing the latency to the return insn. */
+ for (r = TCG_REG_X29; r >= TCG_REG_X21; r -= 2) {
+ int ofs = (r - TCG_REG_X19) * 8;
tcg_out_insn(s, 3314, LDP, r, r + 1, TCG_REG_SP, ofs, 1, 0);
}
- /* Pop (FP, LR), restore SP to previous frame. */
- tcg_out_insn(s, 3314, LDP, TCG_REG_FP, TCG_REG_LR,
+ /* Pop (x19, x20), restoring SP to previous frame. */
+ tcg_out_insn(s, 3314, LDP, TCG_REG_X19, TCG_REG_X20,
TCG_REG_SP, frame_size_callee_saved, 0, 1);
tcg_out_insn(s, 3207, RET, TCG_REG_LR);
}
Loads LR earlier so that it's available for the return branch earlier. Signed-off-by: Richard Henderson <rth@twiddle.net> --- tcg/aarch64/tcg-target.c | 21 +++++++++++---------- 1 file changed, 11 insertions(+), 10 deletions(-)