Patchwork [v3,16/20] tcg-arm: Fix local stack frame

login
register
mail settings
Submitter Richard Henderson
Date March 28, 2013, 3:32 p.m.
Message ID <1364484781-15561-17-git-send-email-rth@twiddle.net>
Download mbox | patch
Permalink /patch/232083/
State New
Headers show

Comments

Richard Henderson - March 28, 2013, 3:32 p.m.
We were not allocating TCG_STATIC_CALL_ARGS_SIZE, so this meant that
any helper with more than 4 arguments would clobber the saved regs.
Realizing that we're supposed to have this memory pre-allocated means
we can clean up the tcg_out_arg functions, which were trying to do
more stack allocation.

Allocate stack memory for the TCG temporaries while we're at it.

Signed-off-by: Richard Henderson <rth@twiddle.net>
---
 tcg/arm/tcg-target.c | 120 ++++++++++++++++++---------------------------------
 1 file changed, 43 insertions(+), 77 deletions(-)
Aurelien Jarno - March 29, 2013, 4:50 p.m.
On Thu, Mar 28, 2013 at 08:32:57AM -0700, Richard Henderson wrote:
> We were not allocating TCG_STATIC_CALL_ARGS_SIZE, so this meant that
> any helper with more than 4 arguments would clobber the saved regs.
> Realizing that we're supposed to have this memory pre-allocated means
> we can clean up the tcg_out_arg functions, which were trying to do
> more stack allocation.
> 
> Allocate stack memory for the TCG temporaries while we're at it.
> 
> Signed-off-by: Richard Henderson <rth@twiddle.net>
> ---
>  tcg/arm/tcg-target.c | 120 ++++++++++++++++++---------------------------------
>  1 file changed, 43 insertions(+), 77 deletions(-)

This fixes a crash booting a mips64 guest, but unfortunately there are
still more issues to fix.

Reviewed-by: Aurelien Jarno <aurelien@aurel32.net>

> diff --git a/tcg/arm/tcg-target.c b/tcg/arm/tcg-target.c
> index fb8c96d..70f63cb 100644
> --- a/tcg/arm/tcg-target.c
> +++ b/tcg/arm/tcg-target.c
> @@ -1074,65 +1074,35 @@ static const void * const qemu_st_helpers[4] = {
>   * argreg is where we want to put this argument, arg is the argument itself.
>   * Return value is the updated argreg ready for the next call.
>   * Note that argreg 0..3 is real registers, 4+ on stack.
> - * When we reach the first stacked argument, we allocate space for it
> - * and the following stacked arguments using "str r8, [sp, #-0x10]!".
> - * Following arguments are filled in with "str r8, [sp, #0xNN]".
> - * For more than 4 stacked arguments we'd need to know how much
> - * space to allocate when we pushed the first stacked argument.
> - * We don't need this, so don't implement it (and will assert if you try it.)
>   *
>   * We provide routines for arguments which are: immediate, 32 bit
>   * value in register, 16 and 8 bit values in register (which must be zero
>   * extended before use) and 64 bit value in a lo:hi register pair.
>   */
> -#define DEFINE_TCG_OUT_ARG(NAME, ARGPARAM)                                 \
> -    static TCGReg NAME(TCGContext *s, TCGReg argreg, ARGPARAM)             \
> -    {                                                                      \
> -        if (argreg < 4) {                                                  \
> -            TCG_OUT_ARG_GET_ARG(argreg);                                   \
> -        } else if (argreg == 4) {                                          \
> -            TCG_OUT_ARG_GET_ARG(TCG_REG_TMP);                              \
> -            tcg_out32(s, (COND_AL << 28) | 0x052d0010 | (TCG_REG_TMP << 12)); \
> -        } else {                                                           \
> -            assert(argreg < 8);                                            \
> -            TCG_OUT_ARG_GET_ARG(TCG_REG_TMP);                              \
> -            tcg_out_st32_12(s, COND_AL, TCG_REG_TMP, TCG_REG_CALL_STACK,   \
> -                            (argreg - 4) * 4);                             \
> -        }                                                                  \
> -        return argreg + 1;                                                 \
> -    }
> -
> -#define TCG_OUT_ARG_GET_ARG(A) tcg_out_dat_imm(s, COND_AL, ARITH_MOV, A, 0, arg)
> -DEFINE_TCG_OUT_ARG(tcg_out_arg_imm32, uint32_t arg)
> -#undef TCG_OUT_ARG_GET_ARG
> -#define TCG_OUT_ARG_GET_ARG(A) tcg_out_ext8u(s, COND_AL, A, arg)
> -DEFINE_TCG_OUT_ARG(tcg_out_arg_reg8, TCGReg arg)
> -#undef TCG_OUT_ARG_GET_ARG
> -#define TCG_OUT_ARG_GET_ARG(A) tcg_out_ext16u(s, COND_AL, A, arg)
> -DEFINE_TCG_OUT_ARG(tcg_out_arg_reg16, TCGReg arg)
> -#undef TCG_OUT_ARG_GET_ARG
> -
> -/* We don't use the macro for this one to avoid an unnecessary reg-reg
> - * move when storing to the stack.
> - */
> -static TCGReg tcg_out_arg_reg32(TCGContext *s, TCGReg argreg, TCGReg arg)
> -{
> -    if (argreg < 4) {
> -        tcg_out_mov_reg(s, COND_AL, argreg, arg);
> -    } else if (argreg == 4) {
> -        /* str arg, [sp, #-0x10]! */
> -        tcg_out32(s, (COND_AL << 28) | 0x052d0010 | (arg << 12));
> -    } else {
> -        assert(argreg < 8);
> -        /* str arg, [sp, #0xNN] */
> -        tcg_out32(s, (COND_AL << 28) | 0x058d0000 |
> -                  (arg << 12) | (argreg - 4) * 4);
> -    }
> -    return argreg + 1;
> -}
> -
> -static inline TCGReg tcg_out_arg_reg64(TCGContext *s, TCGReg argreg,
> -                                       TCGReg arglo, TCGReg arghi)
> +#define DEFINE_TCG_OUT_ARG(NAME, ARGTYPE, MOV_ARG, EXT_ARG)                \
> +static TCGReg NAME(TCGContext *s, TCGReg argreg, ARGTYPE arg)              \
> +{                                                                          \
> +    if (argreg < 4) {                                                      \
> +        MOV_ARG(s, COND_AL, argreg, arg);                                  \
> +    } else {                                                               \
> +        int ofs = (argreg - 4) * 4;                                        \
> +        EXT_ARG;                                                           \
> +        assert(ofs + 4 <= TCG_STATIC_CALL_ARGS_SIZE);                      \
> +        tcg_out_st32_12(s, COND_AL, arg, TCG_REG_CALL_STACK, ofs);         \
> +    }                                                                      \
> +    return argreg + 1;                                                     \
> +}
> +
> +DEFINE_TCG_OUT_ARG(tcg_out_arg_imm32, uint32_t, tcg_out_movi32,
> +    (tcg_out_movi32(s, COND_AL, TCG_REG_TMP, arg), arg = TCG_REG_TMP))
> +DEFINE_TCG_OUT_ARG(tcg_out_arg_reg8, TCGReg, tcg_out_ext8u,
> +    (tcg_out_ext8u(s, COND_AL, TCG_REG_TMP, arg), arg = TCG_REG_TMP))
> +DEFINE_TCG_OUT_ARG(tcg_out_arg_reg16, TCGReg, tcg_out_ext16u,
> +    (tcg_out_ext16u(s, COND_AL, TCG_REG_TMP, arg), arg = TCG_REG_TMP))
> +DEFINE_TCG_OUT_ARG(tcg_out_arg_reg32, TCGReg, tcg_out_mov_reg, )
> +
> +static TCGReg tcg_out_arg_reg64(TCGContext *s, TCGReg argreg,
> +                                TCGReg arglo, TCGReg arghi)
>  {
>      /* 64 bit arguments must go in even/odd register pairs
>       * and in 8-aligned stack slots.
> @@ -1144,16 +1114,7 @@ static inline TCGReg tcg_out_arg_reg64(TCGContext *s, TCGReg argreg,
>      argreg = tcg_out_arg_reg32(s, argreg, arghi);
>      return argreg;
>  }
> -
> -static inline void tcg_out_arg_stacktidy(TCGContext *s, TCGReg argreg)
> -{
> -    /* Output any necessary post-call cleanup of the stack */
> -    if (argreg > 4) {
> -        tcg_out_dat_imm(s, COND_AL, ARITH_ADD, TCG_REG_R13, TCG_REG_R13, 0x10);
> -    }
> -}
> -
> -#endif
> +#endif /* SOFTMMU */
>  
>  #define TLB_SHIFT	(CPU_TLB_ENTRY_BITS + CPU_TLB_BITS)
>  
> @@ -1284,7 +1245,6 @@ static inline void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, int opc)
>  #endif
>      argreg = tcg_out_arg_imm32(s, argreg, mem_index);
>      tcg_out_call(s, (tcg_target_long) qemu_ld_helpers[s_bits]);
> -    tcg_out_arg_stacktidy(s, argreg);
>  
>      switch (opc) {
>      case 0 | 4:
> @@ -1502,7 +1462,6 @@ static inline void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, int opc)
>  
>      argreg = tcg_out_arg_imm32(s, argreg, mem_index);
>      tcg_out_call(s, (tcg_target_long) qemu_st_helpers[s_bits]);
> -    tcg_out_arg_stacktidy(s, argreg);
>  
>      reloc_pc24(label_ptr, (tcg_target_long)s->code_ptr);
>  #else /* !CONFIG_SOFTMMU */
> @@ -1560,6 +1519,7 @@ static inline void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, int opc)
>  }
>  
>  static uint32_t tb_pop_ret;
> +static uint32_t tb_frame_size;
>  
>  static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
>                  const TCGArg *args, const int *const_args)
> @@ -1570,14 +1530,16 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
>      switch (opc) {
>      case INDEX_op_exit_tb:
>          a0 = args[0];
> +        tcg_out_dat_rI(s, COND_AL, ARITH_ADD, TCG_REG_CALL_STACK,
> +                       TCG_REG_CALL_STACK, tb_frame_size, 1);
>          if (use_armv7_instructions || check_fit_imm(a0)) {
> -            tcg_out_movi32(s, COND_AL, TCG_REG_R0, args[0]);
> +            tcg_out_movi32(s, COND_AL, TCG_REG_R0, a0);
>              tcg_out32(s, tb_pop_ret);
>          } else {
>              /* pc is always current address + 8, so 0 reads the word.  */
>              tcg_out_ld32_12(s, COND_AL, TCG_REG_R0, TCG_REG_PC, 0);
>              tcg_out32(s, tb_pop_ret);
> -            tcg_out32(s, args[0]);
> +            tcg_out32(s, a0);
>          }
>          break;
>      case INDEX_op_goto_tb:
> @@ -2002,8 +1964,6 @@ static void tcg_target_init(TCGContext *s)
>      tcg_regset_set_reg(s->reserved_regs, TCG_REG_PC);
>  
>      tcg_add_target_add_op_defs(arm_op_defs);
> -    tcg_set_frame(s, TCG_AREG0, offsetof(CPUArchState, temp_buf),
> -                  CPU_TEMP_BUF_NLONGS * sizeof(long));
>  }
>  
>  static inline void tcg_out_ld(TCGContext *s, TCGType type, TCGReg arg,
> @@ -2032,17 +1992,23 @@ static inline void tcg_out_movi(TCGContext *s, TCGType type,
>  
>  static void tcg_target_qemu_prologue(TCGContext *s)
>  {
> -    /* Calling convention requires us to save r4-r11 and lr;
> -     * save also r12 to maintain stack 8-alignment.
> -     */
> -
> +    /* Calling convention requires us to save r4-r11 and lr; save also r12
> +       to maintain stack 8-alignment.  */
>      /* stmdb sp!, { r4 - r12, lr } */
>      tcg_out32(s, (COND_AL << 28) | 0x092d5ff0);
>  
> +    /* ldmia sp!, { r4 - r12, pc } */
> +    tb_pop_ret = (COND_AL << 28) | 0x08bd9ff0;
> +
> +    /* Allocate the local stack frame.  */
> +    tb_frame_size = TCG_STATIC_CALL_ARGS_SIZE;
> +    tb_frame_size += CPU_TEMP_BUF_NLONGS * sizeof(long);
> +    tcg_out_dat_rI(s, COND_AL, ARITH_SUB, TCG_REG_CALL_STACK,
> +                   TCG_REG_CALL_STACK, tb_frame_size, 1);
> +    tcg_set_frame(s, TCG_REG_CALL_STACK, TCG_STATIC_CALL_ARGS_SIZE,
> +                  CPU_TEMP_BUF_NLONGS * sizeof(long));
> +
>      tcg_out_mov(s, TCG_TYPE_PTR, TCG_AREG0, tcg_target_call_iarg_regs[0]);
>  
>      tcg_out_bx(s, COND_AL, tcg_target_call_iarg_regs[1]);
> -
> -    /* ldmia sp!, { r4 - r12, pc } */
> -    tb_pop_ret = (COND_AL << 28) | 0x08bd9ff0;
>  }
> -- 
> 1.8.1.4
> 
> 
>

Patch

diff --git a/tcg/arm/tcg-target.c b/tcg/arm/tcg-target.c
index fb8c96d..70f63cb 100644
--- a/tcg/arm/tcg-target.c
+++ b/tcg/arm/tcg-target.c
@@ -1074,65 +1074,35 @@  static const void * const qemu_st_helpers[4] = {
  * argreg is where we want to put this argument, arg is the argument itself.
  * Return value is the updated argreg ready for the next call.
  * Note that argreg 0..3 is real registers, 4+ on stack.
- * When we reach the first stacked argument, we allocate space for it
- * and the following stacked arguments using "str r8, [sp, #-0x10]!".
- * Following arguments are filled in with "str r8, [sp, #0xNN]".
- * For more than 4 stacked arguments we'd need to know how much
- * space to allocate when we pushed the first stacked argument.
- * We don't need this, so don't implement it (and will assert if you try it.)
  *
  * We provide routines for arguments which are: immediate, 32 bit
  * value in register, 16 and 8 bit values in register (which must be zero
  * extended before use) and 64 bit value in a lo:hi register pair.
  */
-#define DEFINE_TCG_OUT_ARG(NAME, ARGPARAM)                                 \
-    static TCGReg NAME(TCGContext *s, TCGReg argreg, ARGPARAM)             \
-    {                                                                      \
-        if (argreg < 4) {                                                  \
-            TCG_OUT_ARG_GET_ARG(argreg);                                   \
-        } else if (argreg == 4) {                                          \
-            TCG_OUT_ARG_GET_ARG(TCG_REG_TMP);                              \
-            tcg_out32(s, (COND_AL << 28) | 0x052d0010 | (TCG_REG_TMP << 12)); \
-        } else {                                                           \
-            assert(argreg < 8);                                            \
-            TCG_OUT_ARG_GET_ARG(TCG_REG_TMP);                              \
-            tcg_out_st32_12(s, COND_AL, TCG_REG_TMP, TCG_REG_CALL_STACK,   \
-                            (argreg - 4) * 4);                             \
-        }                                                                  \
-        return argreg + 1;                                                 \
-    }
-
-#define TCG_OUT_ARG_GET_ARG(A) tcg_out_dat_imm(s, COND_AL, ARITH_MOV, A, 0, arg)
-DEFINE_TCG_OUT_ARG(tcg_out_arg_imm32, uint32_t arg)
-#undef TCG_OUT_ARG_GET_ARG
-#define TCG_OUT_ARG_GET_ARG(A) tcg_out_ext8u(s, COND_AL, A, arg)
-DEFINE_TCG_OUT_ARG(tcg_out_arg_reg8, TCGReg arg)
-#undef TCG_OUT_ARG_GET_ARG
-#define TCG_OUT_ARG_GET_ARG(A) tcg_out_ext16u(s, COND_AL, A, arg)
-DEFINE_TCG_OUT_ARG(tcg_out_arg_reg16, TCGReg arg)
-#undef TCG_OUT_ARG_GET_ARG
-
-/* We don't use the macro for this one to avoid an unnecessary reg-reg
- * move when storing to the stack.
- */
-static TCGReg tcg_out_arg_reg32(TCGContext *s, TCGReg argreg, TCGReg arg)
-{
-    if (argreg < 4) {
-        tcg_out_mov_reg(s, COND_AL, argreg, arg);
-    } else if (argreg == 4) {
-        /* str arg, [sp, #-0x10]! */
-        tcg_out32(s, (COND_AL << 28) | 0x052d0010 | (arg << 12));
-    } else {
-        assert(argreg < 8);
-        /* str arg, [sp, #0xNN] */
-        tcg_out32(s, (COND_AL << 28) | 0x058d0000 |
-                  (arg << 12) | (argreg - 4) * 4);
-    }
-    return argreg + 1;
-}
-
-static inline TCGReg tcg_out_arg_reg64(TCGContext *s, TCGReg argreg,
-                                       TCGReg arglo, TCGReg arghi)
+#define DEFINE_TCG_OUT_ARG(NAME, ARGTYPE, MOV_ARG, EXT_ARG)                \
+static TCGReg NAME(TCGContext *s, TCGReg argreg, ARGTYPE arg)              \
+{                                                                          \
+    if (argreg < 4) {                                                      \
+        MOV_ARG(s, COND_AL, argreg, arg);                                  \
+    } else {                                                               \
+        int ofs = (argreg - 4) * 4;                                        \
+        EXT_ARG;                                                           \
+        assert(ofs + 4 <= TCG_STATIC_CALL_ARGS_SIZE);                      \
+        tcg_out_st32_12(s, COND_AL, arg, TCG_REG_CALL_STACK, ofs);         \
+    }                                                                      \
+    return argreg + 1;                                                     \
+}
+
+DEFINE_TCG_OUT_ARG(tcg_out_arg_imm32, uint32_t, tcg_out_movi32,
+    (tcg_out_movi32(s, COND_AL, TCG_REG_TMP, arg), arg = TCG_REG_TMP))
+DEFINE_TCG_OUT_ARG(tcg_out_arg_reg8, TCGReg, tcg_out_ext8u,
+    (tcg_out_ext8u(s, COND_AL, TCG_REG_TMP, arg), arg = TCG_REG_TMP))
+DEFINE_TCG_OUT_ARG(tcg_out_arg_reg16, TCGReg, tcg_out_ext16u,
+    (tcg_out_ext16u(s, COND_AL, TCG_REG_TMP, arg), arg = TCG_REG_TMP))
+DEFINE_TCG_OUT_ARG(tcg_out_arg_reg32, TCGReg, tcg_out_mov_reg, )
+
+static TCGReg tcg_out_arg_reg64(TCGContext *s, TCGReg argreg,
+                                TCGReg arglo, TCGReg arghi)
 {
     /* 64 bit arguments must go in even/odd register pairs
      * and in 8-aligned stack slots.
@@ -1144,16 +1114,7 @@  static inline TCGReg tcg_out_arg_reg64(TCGContext *s, TCGReg argreg,
     argreg = tcg_out_arg_reg32(s, argreg, arghi);
     return argreg;
 }
-
-static inline void tcg_out_arg_stacktidy(TCGContext *s, TCGReg argreg)
-{
-    /* Output any necessary post-call cleanup of the stack */
-    if (argreg > 4) {
-        tcg_out_dat_imm(s, COND_AL, ARITH_ADD, TCG_REG_R13, TCG_REG_R13, 0x10);
-    }
-}
-
-#endif
+#endif /* SOFTMMU */
 
 #define TLB_SHIFT	(CPU_TLB_ENTRY_BITS + CPU_TLB_BITS)
 
@@ -1284,7 +1245,6 @@  static inline void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, int opc)
 #endif
     argreg = tcg_out_arg_imm32(s, argreg, mem_index);
     tcg_out_call(s, (tcg_target_long) qemu_ld_helpers[s_bits]);
-    tcg_out_arg_stacktidy(s, argreg);
 
     switch (opc) {
     case 0 | 4:
@@ -1502,7 +1462,6 @@  static inline void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, int opc)
 
     argreg = tcg_out_arg_imm32(s, argreg, mem_index);
     tcg_out_call(s, (tcg_target_long) qemu_st_helpers[s_bits]);
-    tcg_out_arg_stacktidy(s, argreg);
 
     reloc_pc24(label_ptr, (tcg_target_long)s->code_ptr);
 #else /* !CONFIG_SOFTMMU */
@@ -1560,6 +1519,7 @@  static inline void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, int opc)
 }
 
 static uint32_t tb_pop_ret;
+static uint32_t tb_frame_size;
 
 static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
                 const TCGArg *args, const int *const_args)
@@ -1570,14 +1530,16 @@  static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
     switch (opc) {
     case INDEX_op_exit_tb:
         a0 = args[0];
+        tcg_out_dat_rI(s, COND_AL, ARITH_ADD, TCG_REG_CALL_STACK,
+                       TCG_REG_CALL_STACK, tb_frame_size, 1);
         if (use_armv7_instructions || check_fit_imm(a0)) {
-            tcg_out_movi32(s, COND_AL, TCG_REG_R0, args[0]);
+            tcg_out_movi32(s, COND_AL, TCG_REG_R0, a0);
             tcg_out32(s, tb_pop_ret);
         } else {
             /* pc is always current address + 8, so 0 reads the word.  */
             tcg_out_ld32_12(s, COND_AL, TCG_REG_R0, TCG_REG_PC, 0);
             tcg_out32(s, tb_pop_ret);
-            tcg_out32(s, args[0]);
+            tcg_out32(s, a0);
         }
         break;
     case INDEX_op_goto_tb:
@@ -2002,8 +1964,6 @@  static void tcg_target_init(TCGContext *s)
     tcg_regset_set_reg(s->reserved_regs, TCG_REG_PC);
 
     tcg_add_target_add_op_defs(arm_op_defs);
-    tcg_set_frame(s, TCG_AREG0, offsetof(CPUArchState, temp_buf),
-                  CPU_TEMP_BUF_NLONGS * sizeof(long));
 }
 
 static inline void tcg_out_ld(TCGContext *s, TCGType type, TCGReg arg,
@@ -2032,17 +1992,23 @@  static inline void tcg_out_movi(TCGContext *s, TCGType type,
 
 static void tcg_target_qemu_prologue(TCGContext *s)
 {
-    /* Calling convention requires us to save r4-r11 and lr;
-     * save also r12 to maintain stack 8-alignment.
-     */
-
+    /* Calling convention requires us to save r4-r11 and lr; save also r12
+       to maintain stack 8-alignment.  */
     /* stmdb sp!, { r4 - r12, lr } */
     tcg_out32(s, (COND_AL << 28) | 0x092d5ff0);
 
+    /* ldmia sp!, { r4 - r12, pc } */
+    tb_pop_ret = (COND_AL << 28) | 0x08bd9ff0;
+
+    /* Allocate the local stack frame.  */
+    tb_frame_size = TCG_STATIC_CALL_ARGS_SIZE;
+    tb_frame_size += CPU_TEMP_BUF_NLONGS * sizeof(long);
+    tcg_out_dat_rI(s, COND_AL, ARITH_SUB, TCG_REG_CALL_STACK,
+                   TCG_REG_CALL_STACK, tb_frame_size, 1);
+    tcg_set_frame(s, TCG_REG_CALL_STACK, TCG_STATIC_CALL_ARGS_SIZE,
+                  CPU_TEMP_BUF_NLONGS * sizeof(long));
+
     tcg_out_mov(s, TCG_TYPE_PTR, TCG_AREG0, tcg_target_call_iarg_regs[0]);
 
     tcg_out_bx(s, COND_AL, tcg_target_call_iarg_regs[1]);
-
-    /* ldmia sp!, { r4 - r12, pc } */
-    tb_pop_ret = (COND_AL << 28) | 0x08bd9ff0;
 }