Patchwork i386: Remove REGPARM

login
register
mail settings
Submitter Blue Swirl
Date Feb. 15, 2012, 6:11 p.m.
Message ID <CAAu8pHvEDw_3mrdYh7xPtHJGcejpdNruWKWZux3WEQECzn228g@mail.gmail.com>
Download mbox | patch
Permalink /patch/141358/
State New
Headers show

Comments

Blue Swirl - Feb. 15, 2012, 6:11 p.m.
Use stack based calling convention (GCC default) for interfacing with
generated code instead of register based convention (regparm(3)).

Signed-off-by: Blue Swirl <blauwirbel@gmail.com>
---

Performance test results would be appreciated.

Quickly tried on i386 and x86_64 hosts with 32 and 64 bit guests.

---
 osdep.h               |    6 ---
 softmmu_defs.h        |   32 +++++++-------
 softmmu_template.h    |    8 +--
 tcg/i386/tcg-target.c |  112 ++++++++++++++++++++++++------------------------
 tcg/ppc/tcg-target.h  |    2 +-
 tcg/tcg.c             |   14 ------
 tcg/tcg.h             |    7 +---
 7 files changed, 77 insertions(+), 104 deletions(-)
Richard Henderson - Feb. 15, 2012, 7:36 p.m.
On 02/15/2012 10:11 AM, Blue Swirl wrote:
>  #if defined(CONFIG_SOFTMMU)
> -    int mem_index, s_bits, arg_idx;
> +    int mem_index, s_bits;
> +#if TCG_TARGET_REG_BITS == 64
> +    int arg_idx;
> +#else
> +    int stack_adjust;
> +#endif

...

> -    if (TCG_TARGET_REG_BITS == 64) {
> -        tcg_out_mov(s, (opc == 3 ? TCG_TYPE_I64 : TCG_TYPE_I32),
...
> +#if TCG_TARGET_REG_BITS == 32
> +    tcg_out_pushi(s, mem_index);

Any particular reason you didn't continue with the C-if form,
so that you could avoid the extra ifdeffery at the top there?


r~
Blue Swirl - Feb. 15, 2012, 8:05 p.m.
On Wed, Feb 15, 2012 at 19:36, Richard Henderson <rth@twiddle.net> wrote:
> On 02/15/2012 10:11 AM, Blue Swirl wrote:
>>  #if defined(CONFIG_SOFTMMU)
>> -    int mem_index, s_bits, arg_idx;
>> +    int mem_index, s_bits;
>> +#if TCG_TARGET_REG_BITS == 64
>> +    int arg_idx;
>> +#else
>> +    int stack_adjust;
>> +#endif
>
> ...
>
>> -    if (TCG_TARGET_REG_BITS == 64) {
>> -        tcg_out_mov(s, (opc == 3 ? TCG_TYPE_I64 : TCG_TYPE_I32),
> ...
>> +#if TCG_TARGET_REG_BITS == 32
>> +    tcg_out_pushi(s, mem_index);
>
> Any particular reason you didn't continue with the C-if form,
> so that you could avoid the extra ifdeffery at the top there?

This code was copied from AREG0 version and there the path for 32 bit
version was not the same. Makes indeed sense here.

Patch

From cc5bcbed5cd398c6c7adfd6f6b4acb077bad6903 Mon Sep 17 00:00:00 2001
Message-Id: <cc5bcbed5cd398c6c7adfd6f6b4acb077bad6903.1329329465.git.blauwirbel@gmail.com>
From: Blue Swirl <blauwirbel@gmail.com>
Date: Wed, 15 Feb 2012 18:02:32 +0000
Subject: [PATCH] i386: Remove REGPARM

Use stack based calling convention (GCC default) for interfacing with
generated code instead of register based convention (regparm(3)).

Signed-off-by: Blue Swirl <blauwirbel@gmail.com>
---
 osdep.h               |    6 ---
 softmmu_defs.h        |   32 +++++++-------
 softmmu_template.h    |    8 +--
 tcg/i386/tcg-target.c |  112 ++++++++++++++++++++++++------------------------
 tcg/ppc/tcg-target.h  |    2 +-
 tcg/tcg.c             |   14 ------
 tcg/tcg.h             |    7 +---
 7 files changed, 77 insertions(+), 104 deletions(-)

diff --git a/osdep.h b/osdep.h
index 432b91e..1fc383b 100644
--- a/osdep.h
+++ b/osdep.h
@@ -73,12 +73,6 @@ 
 #define inline always_inline
 #endif
 
-#ifdef __i386__
-#define REGPARM __attribute((regparm(3)))
-#else
-#define REGPARM
-#endif
-
 #define qemu_printf printf
 
 int qemu_daemon(int nochdir, int noclose);
diff --git a/softmmu_defs.h b/softmmu_defs.h
index c5a2bcd..d47d30d 100644
--- a/softmmu_defs.h
+++ b/softmmu_defs.h
@@ -9,22 +9,22 @@ 
 #ifndef SOFTMMU_DEFS_H
 #define SOFTMMU_DEFS_H
 
-uint8_t REGPARM __ldb_mmu(target_ulong addr, int mmu_idx);
-void REGPARM __stb_mmu(target_ulong addr, uint8_t val, int mmu_idx);
-uint16_t REGPARM __ldw_mmu(target_ulong addr, int mmu_idx);
-void REGPARM __stw_mmu(target_ulong addr, uint16_t val, int mmu_idx);
-uint32_t REGPARM __ldl_mmu(target_ulong addr, int mmu_idx);
-void REGPARM __stl_mmu(target_ulong addr, uint32_t val, int mmu_idx);
-uint64_t REGPARM __ldq_mmu(target_ulong addr, int mmu_idx);
-void REGPARM __stq_mmu(target_ulong addr, uint64_t val, int mmu_idx);
+uint8_t __ldb_mmu(target_ulong addr, int mmu_idx);
+void __stb_mmu(target_ulong addr, uint8_t val, int mmu_idx);
+uint16_t __ldw_mmu(target_ulong addr, int mmu_idx);
+void __stw_mmu(target_ulong addr, uint16_t val, int mmu_idx);
+uint32_t __ldl_mmu(target_ulong addr, int mmu_idx);
+void __stl_mmu(target_ulong addr, uint32_t val, int mmu_idx);
+uint64_t __ldq_mmu(target_ulong addr, int mmu_idx);
+void __stq_mmu(target_ulong addr, uint64_t val, int mmu_idx);
 
-uint8_t REGPARM __ldb_cmmu(target_ulong addr, int mmu_idx);
-void REGPARM __stb_cmmu(target_ulong addr, uint8_t val, int mmu_idx);
-uint16_t REGPARM __ldw_cmmu(target_ulong addr, int mmu_idx);
-void REGPARM __stw_cmmu(target_ulong addr, uint16_t val, int mmu_idx);
-uint32_t REGPARM __ldl_cmmu(target_ulong addr, int mmu_idx);
-void REGPARM __stl_cmmu(target_ulong addr, uint32_t val, int mmu_idx);
-uint64_t REGPARM __ldq_cmmu(target_ulong addr, int mmu_idx);
-void REGPARM __stq_cmmu(target_ulong addr, uint64_t val, int mmu_idx);
+uint8_t __ldb_cmmu(target_ulong addr, int mmu_idx);
+void __stb_cmmu(target_ulong addr, uint8_t val, int mmu_idx);
+uint16_t __ldw_cmmu(target_ulong addr, int mmu_idx);
+void __stw_cmmu(target_ulong addr, uint16_t val, int mmu_idx);
+uint32_t __ldl_cmmu(target_ulong addr, int mmu_idx);
+void __stl_cmmu(target_ulong addr, uint32_t val, int mmu_idx);
+uint64_t __ldq_cmmu(target_ulong addr, int mmu_idx);
+void __stq_cmmu(target_ulong addr, uint64_t val, int mmu_idx);
 
 #endif
diff --git a/softmmu_template.h b/softmmu_template.h
index 97020f8..40fcf58 100644
--- a/softmmu_template.h
+++ b/softmmu_template.h
@@ -89,8 +89,7 @@  static inline DATA_TYPE glue(io_read, SUFFIX)(target_phys_addr_t physaddr,
 }
 
 /* handle all cases except unaligned access which span two pages */
-DATA_TYPE REGPARM glue(glue(__ld, SUFFIX), MMUSUFFIX)(target_ulong addr,
-                                                      int mmu_idx)
+DATA_TYPE glue(glue(__ld, SUFFIX), MMUSUFFIX)(target_ulong addr, int mmu_idx)
 {
     DATA_TYPE res;
     int index;
@@ -232,9 +231,8 @@  static inline void glue(io_write, SUFFIX)(target_phys_addr_t physaddr,
 #endif /* SHIFT > 2 */
 }
 
-void REGPARM glue(glue(__st, SUFFIX), MMUSUFFIX)(target_ulong addr,
-                                                 DATA_TYPE val,
-                                                 int mmu_idx)
+void glue(glue(__st, SUFFIX), MMUSUFFIX)(target_ulong addr, DATA_TYPE val,
+                                         int mmu_idx)
 {
     target_phys_addr_t ioaddr;
     unsigned long addend;
diff --git a/tcg/i386/tcg-target.c b/tcg/i386/tcg-target.c
index dc81572..c07da7f 100644
--- a/tcg/i386/tcg-target.c
+++ b/tcg/i386/tcg-target.c
@@ -116,17 +116,7 @@  static inline int tcg_target_get_call_iarg_regs_count(int flags)
         return 6;
     }
 
-    flags &= TCG_CALL_TYPE_MASK;
-    switch(flags) {
-    case TCG_CALL_TYPE_STD:
-        return 0;
-    case TCG_CALL_TYPE_REGPARM_1:
-    case TCG_CALL_TYPE_REGPARM_2:
-    case TCG_CALL_TYPE_REGPARM:
-        return flags - TCG_CALL_TYPE_REGPARM_1 + 1;
-    default:
-        tcg_abort();
-    }
+    return 0;
 }
 
 /* parse target specific constraints */
@@ -1148,7 +1138,12 @@  static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args,
     int data_reg, data_reg2 = 0;
     int addrlo_idx;
 #if defined(CONFIG_SOFTMMU)
-    int mem_index, s_bits, arg_idx;
+    int mem_index, s_bits;
+#if TCG_TARGET_REG_BITS == 64
+    int arg_idx;
+#else
+    int stack_adjust;
+#endif
     uint8_t *label_ptr[3];
 #endif
 
@@ -1184,16 +1179,33 @@  static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args,
     }
 
     /* XXX: move that code at the end of the TB */
+#if TCG_TARGET_REG_BITS == 32
+    tcg_out_pushi(s, mem_index);
+    stack_adjust = 4;
+    if (TARGET_LONG_BITS == 64) {
+        tcg_out_push(s, args[addrlo_idx + 1]);
+        stack_adjust += 4;
+    }
+    tcg_out_push(s, args[addrlo_idx]);
+    stack_adjust += 4;
+#else
     /* The first argument is already loaded with addrlo.  */
     arg_idx = 1;
-    if (TCG_TARGET_REG_BITS == 32 && TARGET_LONG_BITS == 64) {
-        tcg_out_mov(s, TCG_TYPE_I32, tcg_target_call_iarg_regs[arg_idx++],
-                    args[addrlo_idx + 1]);
-    }
     tcg_out_movi(s, TCG_TYPE_I32, tcg_target_call_iarg_regs[arg_idx],
                  mem_index);
+#endif
+
     tcg_out_calli(s, (tcg_target_long)qemu_ld_helpers[s_bits]);
 
+#if TCG_TARGET_REG_BITS == 32
+    if (stack_adjust == (TCG_TARGET_REG_BITS / 8)) {
+        /* Pop and discard.  This is 2 bytes smaller than the add.  */
+        tcg_out_pop(s, TCG_REG_ECX);
+    } else if (stack_adjust != 0) {
+        tcg_out_addi(s, TCG_REG_CALL_STACK, stack_adjust);
+    }
+#endif
+
     switch(opc) {
     case 0 | 4:
         tcg_out_ext8s(s, data_reg, TCG_REG_EAX, P_REXW);
@@ -1359,45 +1371,27 @@  static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args,
     }
 
     /* XXX: move that code at the end of the TB */
-    if (TCG_TARGET_REG_BITS == 64) {
-        tcg_out_mov(s, (opc == 3 ? TCG_TYPE_I64 : TCG_TYPE_I32),
-                    TCG_REG_RSI, data_reg);
-        tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_RDX, mem_index);
-        stack_adjust = 0;
-    } else if (TARGET_LONG_BITS == 32) {
-        tcg_out_mov(s, TCG_TYPE_I32, TCG_REG_EDX, data_reg);
-        if (opc == 3) {
-            tcg_out_mov(s, TCG_TYPE_I32, TCG_REG_ECX, data_reg2);
-            tcg_out_pushi(s, mem_index);
-            stack_adjust = 4;
-        } else {
-            tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_ECX, mem_index);
-            stack_adjust = 0;
-        }
-    } else {
-        if (opc == 3) {
-            tcg_out_mov(s, TCG_TYPE_I32, TCG_REG_EDX, args[addrlo_idx + 1]);
-            tcg_out_pushi(s, mem_index);
-            tcg_out_push(s, data_reg2);
-            tcg_out_push(s, data_reg);
-            stack_adjust = 12;
-        } else {
-            tcg_out_mov(s, TCG_TYPE_I32, TCG_REG_EDX, args[addrlo_idx + 1]);
-            switch(opc) {
-            case 0:
-                tcg_out_ext8u(s, TCG_REG_ECX, data_reg);
-                break;
-            case 1:
-                tcg_out_ext16u(s, TCG_REG_ECX, data_reg);
-                break;
-            case 2:
-                tcg_out_mov(s, TCG_TYPE_I32, TCG_REG_ECX, data_reg);
-                break;
-            }
-            tcg_out_pushi(s, mem_index);
-            stack_adjust = 4;
-        }
+#if TCG_TARGET_REG_BITS == 32
+    tcg_out_pushi(s, mem_index);
+    stack_adjust = 4;
+    if (opc == 3) {
+        tcg_out_push(s, data_reg2);
+        stack_adjust += 4;
+    }
+    tcg_out_push(s, data_reg);
+    stack_adjust += 4;
+    if (TARGET_LONG_BITS == 64) {
+        tcg_out_push(s, args[addrlo_idx + 1]);
+        stack_adjust += 4;
     }
+    tcg_out_push(s, args[addrlo_idx]);
+    stack_adjust += 4;
+#else
+    tcg_out_mov(s, (opc == 3 ? TCG_TYPE_I64 : TCG_TYPE_I32),
+                TCG_REG_RSI, data_reg);
+    tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_RDX, mem_index);
+    stack_adjust = 0;
+#endif
 
     tcg_out_calli(s, (tcg_target_long)qemu_st_helpers[s_bits]);
 
@@ -1962,9 +1956,15 @@  static void tcg_target_qemu_prologue(TCGContext *s)
         tcg_out_push(s, tcg_target_callee_save_regs[i]);
     }
 
-    tcg_out_addi(s, TCG_REG_ESP, -stack_addend);
-
+#if TCG_TARGET_REG_BITS == 32
+    tcg_out_ld(s, TCG_TYPE_PTR, TCG_AREG0, TCG_REG_ESP,
+               (ARRAY_SIZE(tcg_target_callee_save_regs) + 1) * 4);
+    tcg_out_ld(s, TCG_TYPE_PTR, tcg_target_call_iarg_regs[1], TCG_REG_ESP,
+               (ARRAY_SIZE(tcg_target_callee_save_regs) + 2) * 4);
+#else
     tcg_out_mov(s, TCG_TYPE_PTR, TCG_AREG0, tcg_target_call_iarg_regs[0]);
+#endif
+    tcg_out_addi(s, TCG_REG_ESP, -stack_addend);
 
     /* jmp *tb.  */
     tcg_out_modrm(s, OPC_GRP5, EXT5_JMPN_Ev, tcg_target_call_iarg_regs[1]);
diff --git a/tcg/ppc/tcg-target.h b/tcg/ppc/tcg-target.h
index 3f22aaa..2f37fd2 100644
--- a/tcg/ppc/tcg-target.h
+++ b/tcg/ppc/tcg-target.h
@@ -98,5 +98,5 @@  typedef enum {
 #define TCG_TARGET_HAS_GUEST_BASE
 
 #define tcg_qemu_tb_exec(env, tb_ptr) \
-    ((long REGPARM __attribute__ ((longcall)) \
+    ((long __attribute__ ((longcall)) \
       (*)(void *, void *))code_gen_prologue)(env, tb_ptr)
diff --git a/tcg/tcg.c b/tcg/tcg.c
index d43fa4a..44435cb 100644
--- a/tcg/tcg.c
+++ b/tcg/tcg.c
@@ -602,9 +602,6 @@  void tcg_register_helper(void *func, const char *name)
 void tcg_gen_callN(TCGContext *s, TCGv_ptr func, unsigned int flags,
                    int sizemask, TCGArg ret, int nargs, TCGArg *args)
 {
-#if defined(TCG_TARGET_I386) && TCG_TARGET_REG_BITS < 64
-    int call_type;
-#endif
     int i;
     int real_args;
     int nb_rets;
@@ -629,9 +626,6 @@  void tcg_gen_callN(TCGContext *s, TCGv_ptr func, unsigned int flags,
 
     *gen_opc_ptr++ = INDEX_op_call;
     nparam = gen_opparam_ptr++;
-#if defined(TCG_TARGET_I386) && TCG_TARGET_REG_BITS < 64
-    call_type = (flags & TCG_CALL_TYPE_MASK);
-#endif
     if (ret != TCG_CALL_DUMMY_ARG) {
 #if TCG_TARGET_REG_BITS < 64
         if (sizemask & 1) {
@@ -657,14 +651,6 @@  void tcg_gen_callN(TCGContext *s, TCGv_ptr func, unsigned int flags,
 #if TCG_TARGET_REG_BITS < 64
         int is_64bit = sizemask & (1 << (i+1)*2);
         if (is_64bit) {
-#ifdef TCG_TARGET_I386
-            /* REGPARM case: if the third parameter is 64 bit, it is
-               allocated on the stack */
-            if (i == 2 && call_type == TCG_CALL_TYPE_REGPARM) {
-                call_type = TCG_CALL_TYPE_REGPARM_2;
-                flags = (flags & ~TCG_CALL_TYPE_MASK) | call_type;
-            }
-#endif
 #ifdef TCG_TARGET_CALL_ALIGN_ARGS
             /* some targets want aligned 64 bit args */
             if (real_args & 1) {
diff --git a/tcg/tcg.h b/tcg/tcg.h
index 5c28239..9fb2f73 100644
--- a/tcg/tcg.h
+++ b/tcg/tcg.h
@@ -252,11 +252,6 @@  typedef int TCGv_i64;
 #define TCGV_UNUSED_I64(x) x = MAKE_TCGV_I64(-1)
 
 /* call flags */
-#define TCG_CALL_TYPE_MASK      0x000f
-#define TCG_CALL_TYPE_STD       0x0000 /* standard C call */
-#define TCG_CALL_TYPE_REGPARM_1 0x0001 /* i386 style regparm call (1 reg) */
-#define TCG_CALL_TYPE_REGPARM_2 0x0002 /* i386 style regparm call (2 regs) */
-#define TCG_CALL_TYPE_REGPARM   0x0003 /* i386 style regparm call (3 regs) */
 /* A pure function only reads its arguments and TCG global variables
    and cannot raise exceptions. Hence a call to a pure function can be
    safely suppressed if the return value is not used. */
@@ -589,5 +584,5 @@  extern uint8_t code_gen_prologue[];
 /* TCG targets may use a different definition of tcg_qemu_tb_exec. */
 #if !defined(tcg_qemu_tb_exec)
 # define tcg_qemu_tb_exec(env, tb_ptr) \
-    ((long REGPARM (*)(void *, void *))code_gen_prologue)(env, tb_ptr)
+    ((long (*)(void *, void *))code_gen_prologue)(env, tb_ptr)
 #endif
-- 
1.7.2.5